1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2005 - 2016 Broadcom
4 * All rights reserved.
5 *
6 * Contact Information:
7 * linux-drivers@emulex.com
8 *
9 * Emulex
10 * 3333 Susan Street
11 * Costa Mesa, CA 92626
12 */
13
14#include <linux/prefetch.h>
15#include <linux/module.h>
16#include "be.h"
17#include "be_cmds.h"
18#include <asm/div64.h>
19#include <linux/if_bridge.h>
20#include <net/busy_poll.h>
21#include <net/vxlan.h>
22
23MODULE_DESCRIPTION(DRV_DESC);
24MODULE_AUTHOR("Emulex Corporation");
25MODULE_LICENSE("GPL");
26
27/* num_vfs module param is obsolete.
28 * Use sysfs method to enable/disable VFs.
29 */
30static unsigned int num_vfs;
31module_param(num_vfs, uint, 0444);
32MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
33
34static ushort rx_frag_size = 2048;
35module_param(rx_frag_size, ushort, 0444);
36MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
37
38/* Per-module error detection/recovery workq shared across all functions.
39 * Each function schedules its own work request on this shared workq.
40 */
41static struct workqueue_struct *be_err_recovery_workq;
42
43static const struct pci_device_id be_dev_ids[] = {
44#ifdef CONFIG_BE2NET_BE2
45	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
47#endif /* CONFIG_BE2NET_BE2 */
48#ifdef CONFIG_BE2NET_BE3
49	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
50	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
51#endif /* CONFIG_BE2NET_BE3 */
52#ifdef CONFIG_BE2NET_LANCER
53	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
54	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
55#endif /* CONFIG_BE2NET_LANCER */
56#ifdef CONFIG_BE2NET_SKYHAWK
57	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
58	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
59#endif /* CONFIG_BE2NET_SKYHAWK */
60	{ 0 }
61};
62MODULE_DEVICE_TABLE(pci, be_dev_ids);
63
64/* Workqueue used by all functions for defering cmd calls to the adapter */
65static struct workqueue_struct *be_wq;
66
67/* UE Status Low CSR */
68static const char * const ue_status_low_desc[] = {
69	"CEV",
70	"CTX",
71	"DBUF",
72	"ERX",
73	"Host",
74	"MPU",
75	"NDMA",
76	"PTC ",
77	"RDMA ",
78	"RXF ",
79	"RXIPS ",
80	"RXULP0 ",
81	"RXULP1 ",
82	"RXULP2 ",
83	"TIM ",
84	"TPOST ",
85	"TPRE ",
86	"TXIPS ",
87	"TXULP0 ",
88	"TXULP1 ",
89	"UC ",
90	"WDMA ",
91	"TXULP2 ",
92	"HOST1 ",
93	"P0_OB_LINK ",
94	"P1_OB_LINK ",
95	"HOST_GPIO ",
96	"MBOX ",
97	"ERX2 ",
98	"SPARE ",
99	"JTAG ",
100	"MPU_INTPEND "
101};
102
103/* UE Status High CSR */
104static const char * const ue_status_hi_desc[] = {
105	"LPCMEMHOST",
106	"MGMT_MAC",
107	"PCS0ONLINE",
108	"MPU_IRAM",
109	"PCS1ONLINE",
110	"PCTL0",
111	"PCTL1",
112	"PMEM",
113	"RR",
114	"TXPB",
115	"RXPP",
116	"XAUI",
117	"TXP",
118	"ARM",
119	"IPC",
120	"HOST2",
121	"HOST3",
122	"HOST4",
123	"HOST5",
124	"HOST6",
125	"HOST7",
126	"ECRC",
127	"Poison TLP",
128	"NETC",
129	"PERIPH",
130	"LLTXULP",
131	"D2P",
132	"RCON",
133	"LDMA",
134	"LLTXP",
135	"LLTXPB",
136	"Unknown"
137};
138
139#define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
140				 BE_IF_FLAGS_BROADCAST | \
141				 BE_IF_FLAGS_MULTICAST | \
142				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
143
144static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
145{
146	struct be_dma_mem *mem = &q->dma_mem;
147
148	if (mem->va) {
149		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
150				  mem->dma);
151		mem->va = NULL;
152	}
153}
154
155static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
156			  u16 len, u16 entry_size)
157{
158	struct be_dma_mem *mem = &q->dma_mem;
159
160	memset(q, 0, sizeof(*q));
161	q->len = len;
162	q->entry_size = entry_size;
163	mem->size = len * entry_size;
164	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
165				     &mem->dma, GFP_KERNEL);
166	if (!mem->va)
167		return -ENOMEM;
168	return 0;
169}
170
171static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
172{
173	u32 reg, enabled;
174
175	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
176			      &reg);
177	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
178
179	if (!enabled && enable)
180		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181	else if (enabled && !enable)
182		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
183	else
184		return;
185
186	pci_write_config_dword(adapter->pdev,
187			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
188}
189
190static void be_intr_set(struct be_adapter *adapter, bool enable)
191{
192	int status = 0;
193
194	/* On lancer interrupts can't be controlled via this register */
195	if (lancer_chip(adapter))
196		return;
197
198	if (be_check_error(adapter, BE_ERROR_EEH))
199		return;
200
201	status = be_cmd_intr_set(adapter, enable);
202	if (status)
203		be_reg_intr_set(adapter, enable);
204}
205
206static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
207{
208	u32 val = 0;
209
210	if (be_check_error(adapter, BE_ERROR_HW))
211		return;
212
213	val |= qid & DB_RQ_RING_ID_MASK;
214	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
215
216	wmb();
217	iowrite32(val, adapter->db + DB_RQ_OFFSET);
218}
219
220static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
221			  u16 posted)
222{
223	u32 val = 0;
224
225	if (be_check_error(adapter, BE_ERROR_HW))
226		return;
227
228	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
229	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
230
231	wmb();
232	iowrite32(val, adapter->db + txo->db_offset);
233}
234
235static void be_eq_notify(struct be_adapter *adapter, u16 qid,
236			 bool arm, bool clear_int, u16 num_popped,
237			 u32 eq_delay_mult_enc)
238{
239	u32 val = 0;
240
241	val |= qid & DB_EQ_RING_ID_MASK;
242	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
243
244	if (be_check_error(adapter, BE_ERROR_HW))
245		return;
246
247	if (arm)
248		val |= 1 << DB_EQ_REARM_SHIFT;
249	if (clear_int)
250		val |= 1 << DB_EQ_CLR_SHIFT;
251	val |= 1 << DB_EQ_EVNT_SHIFT;
252	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
253	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
254	iowrite32(val, adapter->db + DB_EQ_OFFSET);
255}
256
257void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
258{
259	u32 val = 0;
260
261	val |= qid & DB_CQ_RING_ID_MASK;
262	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
263			DB_CQ_RING_ID_EXT_MASK_SHIFT);
264
265	if (be_check_error(adapter, BE_ERROR_HW))
266		return;
267
268	if (arm)
269		val |= 1 << DB_CQ_REARM_SHIFT;
270	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
271	iowrite32(val, adapter->db + DB_CQ_OFFSET);
272}
273
274static int be_dev_mac_add(struct be_adapter *adapter, const u8 *mac)
275{
276	int i;
277
278	/* Check if mac has already been added as part of uc-list */
279	for (i = 0; i < adapter->uc_macs; i++) {
280		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
281			/* mac already added, skip addition */
282			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
283			return 0;
284		}
285	}
286
287	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
288			       &adapter->pmac_id[0], 0);
289}
290
291static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
292{
293	int i;
294
295	/* Skip deletion if the programmed mac is
296	 * being used in uc-list
297	 */
298	for (i = 0; i < adapter->uc_macs; i++) {
299		if (adapter->pmac_id[i + 1] == pmac_id)
300			return;
301	}
302	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
303}
304
305static int be_mac_addr_set(struct net_device *netdev, void *p)
306{
307	struct be_adapter *adapter = netdev_priv(netdev);
308	struct device *dev = &adapter->pdev->dev;
309	struct sockaddr *addr = p;
310	int status;
311	u8 mac[ETH_ALEN];
312	u32 old_pmac_id = adapter->pmac_id[0];
313
314	if (!is_valid_ether_addr(addr->sa_data))
315		return -EADDRNOTAVAIL;
316
317	/* Proceed further only if, User provided MAC is different
318	 * from active MAC
319	 */
320	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
321		return 0;
322
323	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
324	 * address
325	 */
326	if (BEx_chip(adapter) && be_virtfn(adapter) &&
327	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
328		return -EPERM;
329
330	/* if device is not running, copy MAC to netdev->dev_addr */
331	if (!netif_running(netdev))
332		goto done;
333
334	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
335	 * privilege or if PF did not provision the new MAC address.
336	 * On BE3, this cmd will always fail if the VF doesn't have the
337	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
338	 * the MAC for the VF.
339	 */
340	mutex_lock(&adapter->rx_filter_lock);
341	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
342	if (!status) {
343
344		/* Delete the old programmed MAC. This call may fail if the
345		 * old MAC was already deleted by the PF driver.
346		 */
347		if (adapter->pmac_id[0] != old_pmac_id)
348			be_dev_mac_del(adapter, old_pmac_id);
349	}
350
351	mutex_unlock(&adapter->rx_filter_lock);
352	/* Decide if the new MAC is successfully activated only after
353	 * querying the FW
354	 */
355	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
356				       adapter->if_handle, true, 0);
357	if (status)
358		goto err;
359
360	/* The MAC change did not happen, either due to lack of privilege
361	 * or PF didn't pre-provision.
362	 */
363	if (!ether_addr_equal(addr->sa_data, mac)) {
364		status = -EPERM;
365		goto err;
366	}
367
368	/* Remember currently programmed MAC */
369	ether_addr_copy(adapter->dev_mac, addr->sa_data);
370done:
371	eth_hw_addr_set(netdev, addr->sa_data);
372	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
373	return 0;
374err:
375	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
376	return status;
377}
378
379/* BE2 supports only v0 cmd */
380static void *hw_stats_from_cmd(struct be_adapter *adapter)
381{
382	if (BE2_chip(adapter)) {
383		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
384
385		return &cmd->hw_stats;
386	} else if (BE3_chip(adapter)) {
387		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
388
389		return &cmd->hw_stats;
390	} else {
391		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
392
393		return &cmd->hw_stats;
394	}
395}
396
397/* BE2 supports only v0 cmd */
398static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
399{
400	if (BE2_chip(adapter)) {
401		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
402
403		return &hw_stats->erx;
404	} else if (BE3_chip(adapter)) {
405		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
406
407		return &hw_stats->erx;
408	} else {
409		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
410
411		return &hw_stats->erx;
412	}
413}
414
415static void populate_be_v0_stats(struct be_adapter *adapter)
416{
417	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
418	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
419	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
420	struct be_port_rxf_stats_v0 *port_stats =
421					&rxf_stats->port[adapter->port_num];
422	struct be_drv_stats *drvs = &adapter->drv_stats;
423
424	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
425	drvs->rx_pause_frames = port_stats->rx_pause_frames;
426	drvs->rx_crc_errors = port_stats->rx_crc_errors;
427	drvs->rx_control_frames = port_stats->rx_control_frames;
428	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
429	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
430	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
431	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
432	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
433	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
434	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
435	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
436	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
437	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
438	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
439	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
440	drvs->rx_dropped_header_too_small =
441		port_stats->rx_dropped_header_too_small;
442	drvs->rx_address_filtered =
443					port_stats->rx_address_filtered +
444					port_stats->rx_vlan_filtered;
445	drvs->rx_alignment_symbol_errors =
446		port_stats->rx_alignment_symbol_errors;
447
448	drvs->tx_pauseframes = port_stats->tx_pauseframes;
449	drvs->tx_controlframes = port_stats->tx_controlframes;
450
451	if (adapter->port_num)
452		drvs->jabber_events = rxf_stats->port1_jabber_events;
453	else
454		drvs->jabber_events = rxf_stats->port0_jabber_events;
455	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
456	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
457	drvs->forwarded_packets = rxf_stats->forwarded_packets;
458	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
459	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
460	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
461	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
462}
463
464static void populate_be_v1_stats(struct be_adapter *adapter)
465{
466	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
467	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
468	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
469	struct be_port_rxf_stats_v1 *port_stats =
470					&rxf_stats->port[adapter->port_num];
471	struct be_drv_stats *drvs = &adapter->drv_stats;
472
473	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
474	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
475	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
476	drvs->rx_pause_frames = port_stats->rx_pause_frames;
477	drvs->rx_crc_errors = port_stats->rx_crc_errors;
478	drvs->rx_control_frames = port_stats->rx_control_frames;
479	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
480	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
481	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
482	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
483	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
484	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
485	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
486	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
487	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
488	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
489	drvs->rx_dropped_header_too_small =
490		port_stats->rx_dropped_header_too_small;
491	drvs->rx_input_fifo_overflow_drop =
492		port_stats->rx_input_fifo_overflow_drop;
493	drvs->rx_address_filtered = port_stats->rx_address_filtered;
494	drvs->rx_alignment_symbol_errors =
495		port_stats->rx_alignment_symbol_errors;
496	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
497	drvs->tx_pauseframes = port_stats->tx_pauseframes;
498	drvs->tx_controlframes = port_stats->tx_controlframes;
499	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
500	drvs->jabber_events = port_stats->jabber_events;
501	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
502	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
503	drvs->forwarded_packets = rxf_stats->forwarded_packets;
504	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
505	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
506	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
507	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
508}
509
510static void populate_be_v2_stats(struct be_adapter *adapter)
511{
512	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
513	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
514	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
515	struct be_port_rxf_stats_v2 *port_stats =
516					&rxf_stats->port[adapter->port_num];
517	struct be_drv_stats *drvs = &adapter->drv_stats;
518
519	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
520	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
521	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
522	drvs->rx_pause_frames = port_stats->rx_pause_frames;
523	drvs->rx_crc_errors = port_stats->rx_crc_errors;
524	drvs->rx_control_frames = port_stats->rx_control_frames;
525	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
526	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
527	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
528	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
529	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
530	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
531	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
532	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
533	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
534	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
535	drvs->rx_dropped_header_too_small =
536		port_stats->rx_dropped_header_too_small;
537	drvs->rx_input_fifo_overflow_drop =
538		port_stats->rx_input_fifo_overflow_drop;
539	drvs->rx_address_filtered = port_stats->rx_address_filtered;
540	drvs->rx_alignment_symbol_errors =
541		port_stats->rx_alignment_symbol_errors;
542	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
543	drvs->tx_pauseframes = port_stats->tx_pauseframes;
544	drvs->tx_controlframes = port_stats->tx_controlframes;
545	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
546	drvs->jabber_events = port_stats->jabber_events;
547	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
548	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
549	drvs->forwarded_packets = rxf_stats->forwarded_packets;
550	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
551	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
552	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
553	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
554	if (be_roce_supported(adapter)) {
555		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
556		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
557		drvs->rx_roce_frames = port_stats->roce_frames_received;
558		drvs->roce_drops_crc = port_stats->roce_drops_crc;
559		drvs->roce_drops_payload_len =
560			port_stats->roce_drops_payload_len;
561	}
562}
563
564static void populate_lancer_stats(struct be_adapter *adapter)
565{
566	struct be_drv_stats *drvs = &adapter->drv_stats;
567	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
568
569	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
570	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
571	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
572	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
573	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
574	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
575	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
576	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
577	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
578	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
579	drvs->rx_dropped_tcp_length =
580				pport_stats->rx_dropped_invalid_tcp_length;
581	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
582	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
583	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
584	drvs->rx_dropped_header_too_small =
585				pport_stats->rx_dropped_header_too_small;
586	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
587	drvs->rx_address_filtered =
588					pport_stats->rx_address_filtered +
589					pport_stats->rx_vlan_filtered;
590	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
591	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
592	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
593	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
594	drvs->jabber_events = pport_stats->rx_jabbers;
595	drvs->forwarded_packets = pport_stats->num_forwards_lo;
596	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
597	drvs->rx_drops_too_many_frags =
598				pport_stats->rx_drops_too_many_frags_lo;
599}
600
601static void accumulate_16bit_val(u32 *acc, u16 val)
602{
603#define lo(x)			(x & 0xFFFF)
604#define hi(x)			(x & 0xFFFF0000)
605	bool wrapped = val < lo(*acc);
606	u32 newacc = hi(*acc) + val;
607
608	if (wrapped)
609		newacc += 65536;
610	WRITE_ONCE(*acc, newacc);
611}
612
613static void populate_erx_stats(struct be_adapter *adapter,
614			       struct be_rx_obj *rxo, u32 erx_stat)
615{
616	if (!BEx_chip(adapter))
617		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
618	else
619		/* below erx HW counter can actually wrap around after
620		 * 65535. Driver accumulates a 32-bit value
621		 */
622		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
623				     (u16)erx_stat);
624}
625
626void be_parse_stats(struct be_adapter *adapter)
627{
628	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
629	struct be_rx_obj *rxo;
630	int i;
631	u32 erx_stat;
632
633	if (lancer_chip(adapter)) {
634		populate_lancer_stats(adapter);
635	} else {
636		if (BE2_chip(adapter))
637			populate_be_v0_stats(adapter);
638		else if (BE3_chip(adapter))
639			/* for BE3 */
640			populate_be_v1_stats(adapter);
641		else
642			populate_be_v2_stats(adapter);
643
644		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
645		for_all_rx_queues(adapter, rxo, i) {
646			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
647			populate_erx_stats(adapter, rxo, erx_stat);
648		}
649	}
650}
651
652static void be_get_stats64(struct net_device *netdev,
653			   struct rtnl_link_stats64 *stats)
654{
655	struct be_adapter *adapter = netdev_priv(netdev);
656	struct be_drv_stats *drvs = &adapter->drv_stats;
657	struct be_rx_obj *rxo;
658	struct be_tx_obj *txo;
659	u64 pkts, bytes;
660	unsigned int start;
661	int i;
662
663	for_all_rx_queues(adapter, rxo, i) {
664		const struct be_rx_stats *rx_stats = rx_stats(rxo);
665
666		do {
667			start = u64_stats_fetch_begin(&rx_stats->sync);
668			pkts = rx_stats(rxo)->rx_pkts;
669			bytes = rx_stats(rxo)->rx_bytes;
670		} while (u64_stats_fetch_retry(&rx_stats->sync, start));
671		stats->rx_packets += pkts;
672		stats->rx_bytes += bytes;
673		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
674		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
675					rx_stats(rxo)->rx_drops_no_frags;
676	}
677
678	for_all_tx_queues(adapter, txo, i) {
679		const struct be_tx_stats *tx_stats = tx_stats(txo);
680
681		do {
682			start = u64_stats_fetch_begin(&tx_stats->sync);
683			pkts = tx_stats(txo)->tx_pkts;
684			bytes = tx_stats(txo)->tx_bytes;
685		} while (u64_stats_fetch_retry(&tx_stats->sync, start));
686		stats->tx_packets += pkts;
687		stats->tx_bytes += bytes;
688	}
689
690	/* bad pkts received */
691	stats->rx_errors = drvs->rx_crc_errors +
692		drvs->rx_alignment_symbol_errors +
693		drvs->rx_in_range_errors +
694		drvs->rx_out_range_errors +
695		drvs->rx_frame_too_long +
696		drvs->rx_dropped_too_small +
697		drvs->rx_dropped_too_short +
698		drvs->rx_dropped_header_too_small +
699		drvs->rx_dropped_tcp_length +
700		drvs->rx_dropped_runt;
701
702	/* detailed rx errors */
703	stats->rx_length_errors = drvs->rx_in_range_errors +
704		drvs->rx_out_range_errors +
705		drvs->rx_frame_too_long;
706
707	stats->rx_crc_errors = drvs->rx_crc_errors;
708
709	/* frame alignment errors */
710	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
711
712	/* receiver fifo overrun */
713	/* drops_no_pbuf is no per i/f, it's per BE card */
714	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
715				drvs->rx_input_fifo_overflow_drop +
716				drvs->rx_drops_no_pbuf;
717}
718
719void be_link_status_update(struct be_adapter *adapter, u8 link_status)
720{
721	struct net_device *netdev = adapter->netdev;
722
723	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
724		netif_carrier_off(netdev);
725		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
726	}
727
728	if (link_status)
729		netif_carrier_on(netdev);
730	else
731		netif_carrier_off(netdev);
732
733	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
734}
735
736static int be_gso_hdr_len(struct sk_buff *skb)
737{
738	if (skb->encapsulation)
739		return skb_inner_tcp_all_headers(skb);
740
741	return skb_tcp_all_headers(skb);
742}
743
744static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
745{
746	struct be_tx_stats *stats = tx_stats(txo);
747	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
748	/* Account for headers which get duplicated in TSO pkt */
749	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
750
751	u64_stats_update_begin(&stats->sync);
752	stats->tx_reqs++;
753	stats->tx_bytes += skb->len + dup_hdr_len;
754	stats->tx_pkts += tx_pkts;
755	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
756		stats->tx_vxlan_offload_pkts += tx_pkts;
757	u64_stats_update_end(&stats->sync);
758}
759
760/* Returns number of WRBs needed for the skb */
761static u32 skb_wrb_cnt(struct sk_buff *skb)
762{
763	/* +1 for the header wrb */
764	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
765}
766
767static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
768{
769	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
770	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
771	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
772	wrb->rsvd0 = 0;
773}
774
775/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
776 * to avoid the swap and shift/mask operations in wrb_fill().
777 */
778static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
779{
780	wrb->frag_pa_hi = 0;
781	wrb->frag_pa_lo = 0;
782	wrb->frag_len = 0;
783	wrb->rsvd0 = 0;
784}
785
786static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
787				     struct sk_buff *skb)
788{
789	u8 vlan_prio;
790	u16 vlan_tag;
791
792	vlan_tag = skb_vlan_tag_get(skb);
793	vlan_prio = skb_vlan_tag_get_prio(skb);
794	/* If vlan priority provided by OS is NOT in available bmap */
795	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
796		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
797				adapter->recommended_prio_bits;
798
799	return vlan_tag;
800}
801
802/* Used only for IP tunnel packets */
803static u16 skb_inner_ip_proto(struct sk_buff *skb)
804{
805	return (inner_ip_hdr(skb)->version == 4) ?
806		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
807}
808
809static u16 skb_ip_proto(struct sk_buff *skb)
810{
811	return (ip_hdr(skb)->version == 4) ?
812		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
813}
814
815static inline bool be_is_txq_full(struct be_tx_obj *txo)
816{
817	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
818}
819
820static inline bool be_can_txq_wake(struct be_tx_obj *txo)
821{
822	return atomic_read(&txo->q.used) < txo->q.len / 2;
823}
824
825static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
826{
827	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
828}
829
830static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
831				       struct sk_buff *skb,
832				       struct be_wrb_params *wrb_params)
833{
834	u16 proto;
835
836	if (skb_is_gso(skb)) {
837		BE_WRB_F_SET(wrb_params->features, LSO, 1);
838		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
839		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
840			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
841	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
842		if (skb->encapsulation) {
843			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
844			proto = skb_inner_ip_proto(skb);
845		} else {
846			proto = skb_ip_proto(skb);
847		}
848		if (proto == IPPROTO_TCP)
849			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
850		else if (proto == IPPROTO_UDP)
851			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
852	}
853
854	if (skb_vlan_tag_present(skb)) {
855		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
856		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
857	}
858
859	BE_WRB_F_SET(wrb_params->features, CRC, 1);
860}
861
862static void wrb_fill_hdr(struct be_adapter *adapter,
863			 struct be_eth_hdr_wrb *hdr,
864			 struct be_wrb_params *wrb_params,
865			 struct sk_buff *skb)
866{
867	memset(hdr, 0, sizeof(*hdr));
868
869	SET_TX_WRB_HDR_BITS(crc, hdr,
870			    BE_WRB_F_GET(wrb_params->features, CRC));
871	SET_TX_WRB_HDR_BITS(ipcs, hdr,
872			    BE_WRB_F_GET(wrb_params->features, IPCS));
873	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
874			    BE_WRB_F_GET(wrb_params->features, TCPCS));
875	SET_TX_WRB_HDR_BITS(udpcs, hdr,
876			    BE_WRB_F_GET(wrb_params->features, UDPCS));
877
878	SET_TX_WRB_HDR_BITS(lso, hdr,
879			    BE_WRB_F_GET(wrb_params->features, LSO));
880	SET_TX_WRB_HDR_BITS(lso6, hdr,
881			    BE_WRB_F_GET(wrb_params->features, LSO6));
882	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
883
884	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
885	 * hack is not needed, the evt bit is set while ringing DB.
886	 */
887	SET_TX_WRB_HDR_BITS(event, hdr,
888			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
889	SET_TX_WRB_HDR_BITS(vlan, hdr,
890			    BE_WRB_F_GET(wrb_params->features, VLAN));
891	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
892
893	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
894	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
895	SET_TX_WRB_HDR_BITS(mgmt, hdr,
896			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
897}
898
899static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
900			  bool unmap_single)
901{
902	dma_addr_t dma;
903	u32 frag_len = le32_to_cpu(wrb->frag_len);
904
905
906	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
907		(u64)le32_to_cpu(wrb->frag_pa_lo);
908	if (frag_len) {
909		if (unmap_single)
910			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
911		else
912			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
913	}
914}
915
916/* Grab a WRB header for xmit */
917static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
918{
919	u32 head = txo->q.head;
920
921	queue_head_inc(&txo->q);
922	return head;
923}
924
925/* Set up the WRB header for xmit */
926static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
927				struct be_tx_obj *txo,
928				struct be_wrb_params *wrb_params,
929				struct sk_buff *skb, u16 head)
930{
931	u32 num_frags = skb_wrb_cnt(skb);
932	struct be_queue_info *txq = &txo->q;
933	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
934
935	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
936	be_dws_cpu_to_le(hdr, sizeof(*hdr));
937
938	BUG_ON(txo->sent_skb_list[head]);
939	txo->sent_skb_list[head] = skb;
940	txo->last_req_hdr = head;
941	atomic_add(num_frags, &txq->used);
942	txo->last_req_wrb_cnt = num_frags;
943	txo->pend_wrb_cnt += num_frags;
944}
945
946/* Setup a WRB fragment (buffer descriptor) for xmit */
947static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
948				 int len)
949{
950	struct be_eth_wrb *wrb;
951	struct be_queue_info *txq = &txo->q;
952
953	wrb = queue_head_node(txq);
954	wrb_fill(wrb, busaddr, len);
955	queue_head_inc(txq);
956}
957
958/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
959 * was invoked. The producer index is restored to the previous packet and the
960 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
961 */
962static void be_xmit_restore(struct be_adapter *adapter,
963			    struct be_tx_obj *txo, u32 head, bool map_single,
964			    u32 copied)
965{
966	struct device *dev;
967	struct be_eth_wrb *wrb;
968	struct be_queue_info *txq = &txo->q;
969
970	dev = &adapter->pdev->dev;
971	txq->head = head;
972
973	/* skip the first wrb (hdr); it's not mapped */
974	queue_head_inc(txq);
975	while (copied) {
976		wrb = queue_head_node(txq);
977		unmap_tx_frag(dev, wrb, map_single);
978		map_single = false;
979		copied -= le32_to_cpu(wrb->frag_len);
980		queue_head_inc(txq);
981	}
982
983	txq->head = head;
984}
985
986/* Enqueue the given packet for transmit. This routine allocates WRBs for the
987 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
988 * of WRBs used up by the packet.
989 */
990static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
991			   struct sk_buff *skb,
992			   struct be_wrb_params *wrb_params)
993{
994	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
995	struct device *dev = &adapter->pdev->dev;
996	bool map_single = false;
997	u32 head;
998	dma_addr_t busaddr;
999	int len;
1000
1001	head = be_tx_get_wrb_hdr(txo);
1002
1003	if (skb->len > skb->data_len) {
1004		len = skb_headlen(skb);
1005
1006		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1007		if (dma_mapping_error(dev, busaddr))
1008			goto dma_err;
1009		map_single = true;
1010		be_tx_setup_wrb_frag(txo, busaddr, len);
1011		copied += len;
1012	}
1013
1014	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1015		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1016		len = skb_frag_size(frag);
1017
1018		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1019		if (dma_mapping_error(dev, busaddr))
1020			goto dma_err;
1021		be_tx_setup_wrb_frag(txo, busaddr, len);
1022		copied += len;
1023	}
1024
1025	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1026
1027	be_tx_stats_update(txo, skb);
1028	return wrb_cnt;
1029
1030dma_err:
1031	adapter->drv_stats.dma_map_errors++;
1032	be_xmit_restore(adapter, txo, head, map_single, copied);
1033	return 0;
1034}
1035
1036static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1037{
1038	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1039}
1040
1041static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1042					     struct sk_buff *skb,
1043					     struct be_wrb_params
1044					     *wrb_params)
1045{
1046	bool insert_vlan = false;
1047	u16 vlan_tag = 0;
1048
1049	skb = skb_share_check(skb, GFP_ATOMIC);
1050	if (unlikely(!skb))
1051		return skb;
1052
1053	if (skb_vlan_tag_present(skb)) {
1054		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1055		insert_vlan = true;
1056	}
1057
1058	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1059		if (!insert_vlan) {
1060			vlan_tag = adapter->pvid;
1061			insert_vlan = true;
1062		}
1063		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1064		 * skip VLAN insertion
1065		 */
1066		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1067	}
1068
1069	if (insert_vlan) {
1070		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1071						vlan_tag);
1072		if (unlikely(!skb))
1073			return skb;
1074		__vlan_hwaccel_clear_tag(skb);
1075	}
1076
1077	/* Insert the outer VLAN, if any */
1078	if (adapter->qnq_vid) {
1079		vlan_tag = adapter->qnq_vid;
1080		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1081						vlan_tag);
1082		if (unlikely(!skb))
1083			return skb;
1084		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1085	}
1086
1087	return skb;
1088}
1089
1090static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1091{
1092	struct ethhdr *eh = (struct ethhdr *)skb->data;
1093	u16 offset = ETH_HLEN;
1094
1095	if (eh->h_proto == htons(ETH_P_IPV6)) {
1096		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1097
1098		offset += sizeof(struct ipv6hdr);
1099		if (ip6h->nexthdr != NEXTHDR_TCP &&
1100		    ip6h->nexthdr != NEXTHDR_UDP) {
1101			struct ipv6_opt_hdr *ehdr =
1102				(struct ipv6_opt_hdr *)(skb->data + offset);
1103
1104			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1105			if (ehdr->hdrlen == 0xff)
1106				return true;
1107		}
1108	}
1109	return false;
1110}
1111
1112static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1113{
1114	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1115}
1116
1117static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1118{
1119	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1120}
1121
1122static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1123						  struct sk_buff *skb,
1124						  struct be_wrb_params
1125						  *wrb_params)
1126{
1127	struct vlan_ethhdr *veh = skb_vlan_eth_hdr(skb);
1128	unsigned int eth_hdr_len;
1129	struct iphdr *ip;
1130
1131	/* For padded packets, BE HW modifies tot_len field in IP header
1132	 * incorrecly when VLAN tag is inserted by HW.
1133	 * For padded packets, Lancer computes incorrect checksum.
1134	 */
1135	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1136						VLAN_ETH_HLEN : ETH_HLEN;
1137	if (skb->len <= 60 &&
1138	    (lancer_chip(adapter) || BE3_chip(adapter) ||
1139	     skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
1140		ip = (struct iphdr *)ip_hdr(skb);
1141		if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len))))
1142			goto tx_drop;
1143	}
1144
1145	/* If vlan tag is already inlined in the packet, skip HW VLAN
1146	 * tagging in pvid-tagging mode
1147	 */
1148	if (be_pvid_tagging_enabled(adapter) &&
1149	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1150		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152	/* HW has a bug wherein it will calculate CSUM for VLAN
1153	 * pkts even though it is disabled.
1154	 * Manually insert VLAN in pkt.
1155	 */
1156	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157	    skb_vlan_tag_present(skb)) {
1158		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159		if (unlikely(!skb))
1160			goto err;
1161	}
1162
1163	/* HW may lockup when VLAN HW tagging is requested on
1164	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1165	 * skip HW tagging is not enabled by FW.
1166	 */
1167	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168		     (adapter->pvid || adapter->qnq_vid) &&
1169		     !qnq_async_evt_rcvd(adapter)))
1170		goto tx_drop;
1171
1172	/* Manual VLAN tag insertion to prevent:
1173	 * ASIC lockup when the ASIC inserts VLAN tag into
1174	 * certain ipv6 packets. Insert VLAN tags in driver,
1175	 * and set event, completion, vlan bits accordingly
1176	 * in the Tx WRB.
1177	 */
1178	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179	    be_vlan_tag_tx_chk(adapter, skb)) {
1180		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181		if (unlikely(!skb))
1182			goto err;
1183	}
1184
1185	return skb;
1186tx_drop:
1187	dev_kfree_skb_any(skb);
1188err:
1189	return NULL;
1190}
1191
1192static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193					   struct sk_buff *skb,
1194					   struct be_wrb_params *wrb_params)
1195{
1196	int err;
1197
1198	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199	 * packets that are 32b or less may cause a transmit stall
1200	 * on that port. The workaround is to pad such packets
1201	 * (len <= 32 bytes) to a minimum length of 36b.
1202	 */
1203	if (skb->len <= 32) {
1204		if (skb_put_padto(skb, 36))
1205			return NULL;
1206	}
1207
1208	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210		if (!skb)
1211			return NULL;
1212	}
1213
1214	/* The stack can send us skbs with length greater than
1215	 * what the HW can handle. Trim the extra bytes.
1216	 */
1217	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219	WARN_ON(err);
1220
1221	return skb;
1222}
1223
1224static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225{
1226	struct be_queue_info *txq = &txo->q;
1227	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229	/* Mark the last request eventable if it hasn't been marked already */
1230	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233	/* compose a dummy wrb if there are odd set of wrbs to notify */
1234	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235		wrb_fill_dummy(queue_head_node(txq));
1236		queue_head_inc(txq);
1237		atomic_inc(&txq->used);
1238		txo->pend_wrb_cnt++;
1239		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240					   TX_HDR_WRB_NUM_SHIFT);
1241		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242					  TX_HDR_WRB_NUM_SHIFT);
1243	}
1244	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245	txo->pend_wrb_cnt = 0;
1246}
1247
1248/* OS2BMC related */
1249
1250#define DHCP_CLIENT_PORT	68
1251#define DHCP_SERVER_PORT	67
1252#define NET_BIOS_PORT1		137
1253#define NET_BIOS_PORT2		138
1254#define DHCPV6_RAS_PORT		547
1255
1256#define is_mc_allowed_on_bmc(adapter, eh)	\
1257	(!is_multicast_filt_enabled(adapter) &&	\
1258	 is_multicast_ether_addr(eh->h_dest) &&	\
1259	 !is_broadcast_ether_addr(eh->h_dest))
1260
1261#define is_bc_allowed_on_bmc(adapter, eh)	\
1262	(!is_broadcast_filt_enabled(adapter) &&	\
1263	 is_broadcast_ether_addr(eh->h_dest))
1264
1265#define is_arp_allowed_on_bmc(adapter, skb)	\
1266	(is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268#define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1269
1270#define is_arp_filt_enabled(adapter)	\
1271		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1272
1273#define is_dhcp_client_filt_enabled(adapter)	\
1274		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1275
1276#define is_dhcp_srvr_filt_enabled(adapter)	\
1277		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1278
1279#define is_nbios_filt_enabled(adapter)	\
1280		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1281
1282#define is_ipv6_na_filt_enabled(adapter)	\
1283		(adapter->bmc_filt_mask &	\
1284			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1285
1286#define is_ipv6_ra_filt_enabled(adapter)	\
1287		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1288
1289#define is_ipv6_ras_filt_enabled(adapter)	\
1290		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1291
1292#define is_broadcast_filt_enabled(adapter)	\
1293		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1294
1295#define is_multicast_filt_enabled(adapter)	\
1296		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1297
1298static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1299			       struct sk_buff **skb)
1300{
1301	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1302	bool os2bmc = false;
1303
1304	if (!be_is_os2bmc_enabled(adapter))
1305		goto done;
1306
1307	if (!is_multicast_ether_addr(eh->h_dest))
1308		goto done;
1309
1310	if (is_mc_allowed_on_bmc(adapter, eh) ||
1311	    is_bc_allowed_on_bmc(adapter, eh) ||
1312	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1313		os2bmc = true;
1314		goto done;
1315	}
1316
1317	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1318		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1319		u8 nexthdr = hdr->nexthdr;
1320
1321		if (nexthdr == IPPROTO_ICMPV6) {
1322			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1323
1324			switch (icmp6->icmp6_type) {
1325			case NDISC_ROUTER_ADVERTISEMENT:
1326				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1327				goto done;
1328			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1329				os2bmc = is_ipv6_na_filt_enabled(adapter);
1330				goto done;
1331			default:
1332				break;
1333			}
1334		}
1335	}
1336
1337	if (is_udp_pkt((*skb))) {
1338		struct udphdr *udp = udp_hdr((*skb));
1339
1340		switch (ntohs(udp->dest)) {
1341		case DHCP_CLIENT_PORT:
1342			os2bmc = is_dhcp_client_filt_enabled(adapter);
1343			goto done;
1344		case DHCP_SERVER_PORT:
1345			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1346			goto done;
1347		case NET_BIOS_PORT1:
1348		case NET_BIOS_PORT2:
1349			os2bmc = is_nbios_filt_enabled(adapter);
1350			goto done;
1351		case DHCPV6_RAS_PORT:
1352			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1353			goto done;
1354		default:
1355			break;
1356		}
1357	}
1358done:
1359	/* For packets over a vlan, which are destined
1360	 * to BMC, asic expects the vlan to be inline in the packet.
1361	 */
1362	if (os2bmc)
1363		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1364
1365	return os2bmc;
1366}
1367
1368static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1369{
1370	struct be_adapter *adapter = netdev_priv(netdev);
1371	u16 q_idx = skb_get_queue_mapping(skb);
1372	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1373	struct be_wrb_params wrb_params = { 0 };
1374	bool flush = !netdev_xmit_more();
1375	u16 wrb_cnt;
1376
1377	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1378	if (unlikely(!skb))
1379		goto drop;
1380
1381	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1382
1383	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384	if (unlikely(!wrb_cnt)) {
1385		dev_kfree_skb_any(skb);
1386		goto drop;
1387	}
1388
1389	/* if os2bmc is enabled and if the pkt is destined to bmc,
1390	 * enqueue the pkt a 2nd time with mgmt bit set.
1391	 */
1392	if (be_send_pkt_to_bmc(adapter, &skb)) {
1393		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1394		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1395		if (unlikely(!wrb_cnt))
1396			goto drop;
1397		else
1398			skb_get(skb);
1399	}
1400
1401	if (be_is_txq_full(txo)) {
1402		netif_stop_subqueue(netdev, q_idx);
1403		tx_stats(txo)->tx_stops++;
1404	}
1405
1406	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1407		be_xmit_flush(adapter, txo);
1408
1409	return NETDEV_TX_OK;
1410drop:
1411	tx_stats(txo)->tx_drv_drops++;
1412	/* Flush the already enqueued tx requests */
1413	if (flush && txo->pend_wrb_cnt)
1414		be_xmit_flush(adapter, txo);
1415
1416	return NETDEV_TX_OK;
1417}
1418
1419static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1420{
1421	struct be_adapter *adapter = netdev_priv(netdev);
1422	struct device *dev = &adapter->pdev->dev;
1423	struct be_tx_obj *txo;
1424	struct sk_buff *skb;
1425	struct tcphdr *tcphdr;
1426	struct udphdr *udphdr;
1427	u32 *entry;
1428	int status;
1429	int i, j;
1430
1431	for_all_tx_queues(adapter, txo, i) {
1432		dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1433			 i, txo->q.head, txo->q.tail,
1434			 atomic_read(&txo->q.used), txo->q.id);
1435
1436		entry = txo->q.dma_mem.va;
1437		for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1438			if (entry[j] != 0 || entry[j + 1] != 0 ||
1439			    entry[j + 2] != 0 || entry[j + 3] != 0) {
1440				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1441					 j, entry[j], entry[j + 1],
1442					 entry[j + 2], entry[j + 3]);
1443			}
1444		}
1445
1446		entry = txo->cq.dma_mem.va;
1447		dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1448			 i, txo->cq.head, txo->cq.tail,
1449			 atomic_read(&txo->cq.used));
1450		for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1451			if (entry[j] != 0 || entry[j + 1] != 0 ||
1452			    entry[j + 2] != 0 || entry[j + 3] != 0) {
1453				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1454					 j, entry[j], entry[j + 1],
1455					 entry[j + 2], entry[j + 3]);
1456			}
1457		}
1458
1459		for (j = 0; j < TX_Q_LEN; j++) {
1460			if (txo->sent_skb_list[j]) {
1461				skb = txo->sent_skb_list[j];
1462				if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1463					tcphdr = tcp_hdr(skb);
1464					dev_info(dev, "TCP source port %d\n",
1465						 ntohs(tcphdr->source));
1466					dev_info(dev, "TCP dest port %d\n",
1467						 ntohs(tcphdr->dest));
1468					dev_info(dev, "TCP sequence num %d\n",
1469						 ntohs(tcphdr->seq));
1470					dev_info(dev, "TCP ack_seq %d\n",
1471						 ntohs(tcphdr->ack_seq));
1472				} else if (ip_hdr(skb)->protocol ==
1473					   IPPROTO_UDP) {
1474					udphdr = udp_hdr(skb);
1475					dev_info(dev, "UDP source port %d\n",
1476						 ntohs(udphdr->source));
1477					dev_info(dev, "UDP dest port %d\n",
1478						 ntohs(udphdr->dest));
1479				}
1480				dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1481					 j, skb, skb->len, skb->protocol);
1482			}
1483		}
1484	}
1485
1486	if (lancer_chip(adapter)) {
1487		dev_info(dev, "Initiating reset due to tx timeout\n");
1488		dev_info(dev, "Resetting adapter\n");
1489		status = lancer_physdev_ctrl(adapter,
1490					     PHYSDEV_CONTROL_FW_RESET_MASK);
1491		if (status)
1492			dev_err(dev, "Reset failed .. Reboot server\n");
1493	}
1494}
1495
1496static inline bool be_in_all_promisc(struct be_adapter *adapter)
1497{
1498	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1499			BE_IF_FLAGS_ALL_PROMISCUOUS;
1500}
1501
1502static int be_set_vlan_promisc(struct be_adapter *adapter)
1503{
1504	struct device *dev = &adapter->pdev->dev;
1505	int status;
1506
1507	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1508		return 0;
1509
1510	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1511	if (!status) {
1512		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1513		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1514	} else {
1515		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1516	}
1517	return status;
1518}
1519
1520static int be_clear_vlan_promisc(struct be_adapter *adapter)
1521{
1522	struct device *dev = &adapter->pdev->dev;
1523	int status;
1524
1525	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1526	if (!status) {
1527		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1528		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1529	}
1530	return status;
1531}
1532
1533/*
1534 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1535 * If the user configures more, place BE in vlan promiscuous mode.
1536 */
1537static int be_vid_config(struct be_adapter *adapter)
1538{
1539	struct device *dev = &adapter->pdev->dev;
1540	u16 vids[BE_NUM_VLANS_SUPPORTED];
1541	u16 num = 0, i = 0;
1542	int status = 0;
1543
1544	/* No need to change the VLAN state if the I/F is in promiscuous */
1545	if (adapter->netdev->flags & IFF_PROMISC)
1546		return 0;
1547
1548	if (adapter->vlans_added > be_max_vlans(adapter))
1549		return be_set_vlan_promisc(adapter);
1550
1551	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1552		status = be_clear_vlan_promisc(adapter);
1553		if (status)
1554			return status;
1555	}
1556	/* Construct VLAN Table to give to HW */
1557	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1558		vids[num++] = cpu_to_le16(i);
1559
1560	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1561	if (status) {
1562		dev_err(dev, "Setting HW VLAN filtering failed\n");
1563		/* Set to VLAN promisc mode as setting VLAN filter failed */
1564		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1565		    addl_status(status) ==
1566				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1567			return be_set_vlan_promisc(adapter);
1568	}
1569	return status;
1570}
1571
1572static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1573{
1574	struct be_adapter *adapter = netdev_priv(netdev);
1575	int status = 0;
1576
1577	mutex_lock(&adapter->rx_filter_lock);
1578
1579	/* Packets with VID 0 are always received by Lancer by default */
1580	if (lancer_chip(adapter) && vid == 0)
1581		goto done;
1582
1583	if (test_bit(vid, adapter->vids))
1584		goto done;
1585
1586	set_bit(vid, adapter->vids);
1587	adapter->vlans_added++;
1588
1589	status = be_vid_config(adapter);
1590done:
1591	mutex_unlock(&adapter->rx_filter_lock);
1592	return status;
1593}
1594
1595static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1596{
1597	struct be_adapter *adapter = netdev_priv(netdev);
1598	int status = 0;
1599
1600	mutex_lock(&adapter->rx_filter_lock);
1601
1602	/* Packets with VID 0 are always received by Lancer by default */
1603	if (lancer_chip(adapter) && vid == 0)
1604		goto done;
1605
1606	if (!test_bit(vid, adapter->vids))
1607		goto done;
1608
1609	clear_bit(vid, adapter->vids);
1610	adapter->vlans_added--;
1611
1612	status = be_vid_config(adapter);
1613done:
1614	mutex_unlock(&adapter->rx_filter_lock);
1615	return status;
1616}
1617
1618static void be_set_all_promisc(struct be_adapter *adapter)
1619{
1620	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1621	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1622}
1623
1624static void be_set_mc_promisc(struct be_adapter *adapter)
1625{
1626	int status;
1627
1628	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1629		return;
1630
1631	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1632	if (!status)
1633		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1634}
1635
1636static void be_set_uc_promisc(struct be_adapter *adapter)
1637{
1638	int status;
1639
1640	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1641		return;
1642
1643	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1644	if (!status)
1645		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1646}
1647
1648static void be_clear_uc_promisc(struct be_adapter *adapter)
1649{
1650	int status;
1651
1652	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1653		return;
1654
1655	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1656	if (!status)
1657		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1658}
1659
1660/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1661 * We use a single callback function for both sync and unsync. We really don't
1662 * add/remove addresses through this callback. But, we use it to detect changes
1663 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1664 */
1665static int be_uc_list_update(struct net_device *netdev,
1666			     const unsigned char *addr)
1667{
1668	struct be_adapter *adapter = netdev_priv(netdev);
1669
1670	adapter->update_uc_list = true;
1671	return 0;
1672}
1673
1674static int be_mc_list_update(struct net_device *netdev,
1675			     const unsigned char *addr)
1676{
1677	struct be_adapter *adapter = netdev_priv(netdev);
1678
1679	adapter->update_mc_list = true;
1680	return 0;
1681}
1682
1683static void be_set_mc_list(struct be_adapter *adapter)
1684{
1685	struct net_device *netdev = adapter->netdev;
1686	struct netdev_hw_addr *ha;
1687	bool mc_promisc = false;
1688	int status;
1689
1690	netif_addr_lock_bh(netdev);
1691	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1692
1693	if (netdev->flags & IFF_PROMISC) {
1694		adapter->update_mc_list = false;
1695	} else if (netdev->flags & IFF_ALLMULTI ||
1696		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1697		/* Enable multicast promisc if num configured exceeds
1698		 * what we support
1699		 */
1700		mc_promisc = true;
1701		adapter->update_mc_list = false;
1702	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1703		/* Update mc-list unconditionally if the iface was previously
1704		 * in mc-promisc mode and now is out of that mode.
1705		 */
1706		adapter->update_mc_list = true;
1707	}
1708
1709	if (adapter->update_mc_list) {
1710		int i = 0;
1711
1712		/* cache the mc-list in adapter */
1713		netdev_for_each_mc_addr(ha, netdev) {
1714			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1715			i++;
1716		}
1717		adapter->mc_count = netdev_mc_count(netdev);
1718	}
1719	netif_addr_unlock_bh(netdev);
1720
1721	if (mc_promisc) {
1722		be_set_mc_promisc(adapter);
1723	} else if (adapter->update_mc_list) {
1724		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1725		if (!status)
1726			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1727		else
1728			be_set_mc_promisc(adapter);
1729
1730		adapter->update_mc_list = false;
1731	}
1732}
1733
1734static void be_clear_mc_list(struct be_adapter *adapter)
1735{
1736	struct net_device *netdev = adapter->netdev;
1737
1738	__dev_mc_unsync(netdev, NULL);
1739	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1740	adapter->mc_count = 0;
1741}
1742
1743static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1744{
1745	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1746		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1747		return 0;
1748	}
1749
1750	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1751			       adapter->if_handle,
1752			       &adapter->pmac_id[uc_idx + 1], 0);
1753}
1754
1755static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1756{
1757	if (pmac_id == adapter->pmac_id[0])
1758		return;
1759
1760	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1761}
1762
1763static void be_set_uc_list(struct be_adapter *adapter)
1764{
1765	struct net_device *netdev = adapter->netdev;
1766	struct netdev_hw_addr *ha;
1767	bool uc_promisc = false;
1768	int curr_uc_macs = 0, i;
1769
1770	netif_addr_lock_bh(netdev);
1771	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1772
1773	if (netdev->flags & IFF_PROMISC) {
1774		adapter->update_uc_list = false;
1775	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1776		uc_promisc = true;
1777		adapter->update_uc_list = false;
1778	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1779		/* Update uc-list unconditionally if the iface was previously
1780		 * in uc-promisc mode and now is out of that mode.
1781		 */
1782		adapter->update_uc_list = true;
1783	}
1784
1785	if (adapter->update_uc_list) {
1786		/* cache the uc-list in adapter array */
1787		i = 0;
1788		netdev_for_each_uc_addr(ha, netdev) {
1789			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1790			i++;
1791		}
1792		curr_uc_macs = netdev_uc_count(netdev);
1793	}
1794	netif_addr_unlock_bh(netdev);
1795
1796	if (uc_promisc) {
1797		be_set_uc_promisc(adapter);
1798	} else if (adapter->update_uc_list) {
1799		be_clear_uc_promisc(adapter);
1800
1801		for (i = 0; i < adapter->uc_macs; i++)
1802			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1803
1804		for (i = 0; i < curr_uc_macs; i++)
1805			be_uc_mac_add(adapter, i);
1806		adapter->uc_macs = curr_uc_macs;
1807		adapter->update_uc_list = false;
1808	}
1809}
1810
1811static void be_clear_uc_list(struct be_adapter *adapter)
1812{
1813	struct net_device *netdev = adapter->netdev;
1814	int i;
1815
1816	__dev_uc_unsync(netdev, NULL);
1817	for (i = 0; i < adapter->uc_macs; i++)
1818		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1819
1820	adapter->uc_macs = 0;
1821}
1822
1823static void __be_set_rx_mode(struct be_adapter *adapter)
1824{
1825	struct net_device *netdev = adapter->netdev;
1826
1827	mutex_lock(&adapter->rx_filter_lock);
1828
1829	if (netdev->flags & IFF_PROMISC) {
1830		if (!be_in_all_promisc(adapter))
1831			be_set_all_promisc(adapter);
1832	} else if (be_in_all_promisc(adapter)) {
1833		/* We need to re-program the vlan-list or clear
1834		 * vlan-promisc mode (if needed) when the interface
1835		 * comes out of promisc mode.
1836		 */
1837		be_vid_config(adapter);
1838	}
1839
1840	be_set_uc_list(adapter);
1841	be_set_mc_list(adapter);
1842
1843	mutex_unlock(&adapter->rx_filter_lock);
1844}
1845
1846static void be_work_set_rx_mode(struct work_struct *work)
1847{
1848	struct be_cmd_work *cmd_work =
1849				container_of(work, struct be_cmd_work, work);
1850
1851	__be_set_rx_mode(cmd_work->adapter);
1852	kfree(cmd_work);
1853}
1854
1855static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1856{
1857	struct be_adapter *adapter = netdev_priv(netdev);
1858	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1859	int status;
1860
1861	if (!sriov_enabled(adapter))
1862		return -EPERM;
1863
1864	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1865		return -EINVAL;
1866
1867	/* Proceed further only if user provided MAC is different
1868	 * from active MAC
1869	 */
1870	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1871		return 0;
1872
1873	if (BEx_chip(adapter)) {
1874		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1875				vf + 1);
1876
1877		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1878					 &vf_cfg->pmac_id, vf + 1);
1879	} else {
1880		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1881					vf + 1);
1882	}
1883
1884	if (status) {
1885		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1886			mac, vf, status);
1887		return be_cmd_status(status);
1888	}
1889
1890	ether_addr_copy(vf_cfg->mac_addr, mac);
1891
1892	return 0;
1893}
1894
1895static int be_get_vf_config(struct net_device *netdev, int vf,
1896			    struct ifla_vf_info *vi)
1897{
1898	struct be_adapter *adapter = netdev_priv(netdev);
1899	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900
1901	if (!sriov_enabled(adapter))
1902		return -EPERM;
1903
1904	if (vf >= adapter->num_vfs)
1905		return -EINVAL;
1906
1907	vi->vf = vf;
1908	vi->max_tx_rate = vf_cfg->tx_rate;
1909	vi->min_tx_rate = 0;
1910	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1911	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1912	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1913	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1914	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1915
1916	return 0;
1917}
1918
1919static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1920{
1921	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1922	u16 vids[BE_NUM_VLANS_SUPPORTED];
1923	int vf_if_id = vf_cfg->if_handle;
1924	int status;
1925
1926	/* Enable Transparent VLAN Tagging */
1927	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1928	if (status)
1929		return status;
1930
1931	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1932	vids[0] = 0;
1933	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1934	if (!status)
1935		dev_info(&adapter->pdev->dev,
1936			 "Cleared guest VLANs on VF%d", vf);
1937
1938	/* After TVT is enabled, disallow VFs to program VLAN filters */
1939	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1940		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1941						  ~BE_PRIV_FILTMGMT, vf + 1);
1942		if (!status)
1943			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1944	}
1945	return 0;
1946}
1947
1948static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1949{
1950	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1951	struct device *dev = &adapter->pdev->dev;
1952	int status;
1953
1954	/* Reset Transparent VLAN Tagging. */
1955	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1956				       vf_cfg->if_handle, 0, 0);
1957	if (status)
1958		return status;
1959
1960	/* Allow VFs to program VLAN filtering */
1961	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1962		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1963						  BE_PRIV_FILTMGMT, vf + 1);
1964		if (!status) {
1965			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1966			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1967		}
1968	}
1969
1970	dev_info(dev,
1971		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1972	return 0;
1973}
1974
1975static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1976			  __be16 vlan_proto)
1977{
1978	struct be_adapter *adapter = netdev_priv(netdev);
1979	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1980	int status;
1981
1982	if (!sriov_enabled(adapter))
1983		return -EPERM;
1984
1985	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1986		return -EINVAL;
1987
1988	if (vlan_proto != htons(ETH_P_8021Q))
1989		return -EPROTONOSUPPORT;
1990
1991	if (vlan || qos) {
1992		vlan |= qos << VLAN_PRIO_SHIFT;
1993		status = be_set_vf_tvt(adapter, vf, vlan);
1994	} else {
1995		status = be_clear_vf_tvt(adapter, vf);
1996	}
1997
1998	if (status) {
1999		dev_err(&adapter->pdev->dev,
2000			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2001			status);
2002		return be_cmd_status(status);
2003	}
2004
2005	vf_cfg->vlan_tag = vlan;
2006	return 0;
2007}
2008
2009static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2010			     int min_tx_rate, int max_tx_rate)
2011{
2012	struct be_adapter *adapter = netdev_priv(netdev);
2013	struct device *dev = &adapter->pdev->dev;
2014	int percent_rate, status = 0;
2015	u16 link_speed = 0;
2016	u8 link_status;
2017
2018	if (!sriov_enabled(adapter))
2019		return -EPERM;
2020
2021	if (vf >= adapter->num_vfs)
2022		return -EINVAL;
2023
2024	if (min_tx_rate)
2025		return -EINVAL;
2026
2027	if (!max_tx_rate)
2028		goto config_qos;
2029
2030	status = be_cmd_link_status_query(adapter, &link_speed,
2031					  &link_status, 0);
2032	if (status)
2033		goto err;
2034
2035	if (!link_status) {
2036		dev_err(dev, "TX-rate setting not allowed when link is down\n");
2037		status = -ENETDOWN;
2038		goto err;
2039	}
2040
2041	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2042		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2043			link_speed);
2044		status = -EINVAL;
2045		goto err;
2046	}
2047
2048	/* On Skyhawk the QOS setting must be done only as a % value */
2049	percent_rate = link_speed / 100;
2050	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2051		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2052			percent_rate);
2053		status = -EINVAL;
2054		goto err;
2055	}
2056
2057config_qos:
2058	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2059	if (status)
2060		goto err;
2061
2062	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2063	return 0;
2064
2065err:
2066	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2067		max_tx_rate, vf);
2068	return be_cmd_status(status);
2069}
2070
2071static int be_set_vf_link_state(struct net_device *netdev, int vf,
2072				int link_state)
2073{
2074	struct be_adapter *adapter = netdev_priv(netdev);
2075	int status;
2076
2077	if (!sriov_enabled(adapter))
2078		return -EPERM;
2079
2080	if (vf >= adapter->num_vfs)
2081		return -EINVAL;
2082
2083	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2084	if (status) {
2085		dev_err(&adapter->pdev->dev,
2086			"Link state change on VF %d failed: %#x\n", vf, status);
2087		return be_cmd_status(status);
2088	}
2089
2090	adapter->vf_cfg[vf].plink_tracking = link_state;
2091
2092	return 0;
2093}
2094
2095static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2096{
2097	struct be_adapter *adapter = netdev_priv(netdev);
2098	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2099	u8 spoofchk;
2100	int status;
2101
2102	if (!sriov_enabled(adapter))
2103		return -EPERM;
2104
2105	if (vf >= adapter->num_vfs)
2106		return -EINVAL;
2107
2108	if (BEx_chip(adapter))
2109		return -EOPNOTSUPP;
2110
2111	if (enable == vf_cfg->spoofchk)
2112		return 0;
2113
2114	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2115
2116	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2117				       0, spoofchk);
2118	if (status) {
2119		dev_err(&adapter->pdev->dev,
2120			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2121		return be_cmd_status(status);
2122	}
2123
2124	vf_cfg->spoofchk = enable;
2125	return 0;
2126}
2127
2128static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2129			  ulong now)
2130{
2131	aic->rx_pkts_prev = rx_pkts;
2132	aic->tx_reqs_prev = tx_pkts;
2133	aic->jiffies = now;
2134}
2135
2136static int be_get_new_eqd(struct be_eq_obj *eqo)
2137{
2138	struct be_adapter *adapter = eqo->adapter;
2139	int eqd, start;
2140	struct be_aic_obj *aic;
2141	struct be_rx_obj *rxo;
2142	struct be_tx_obj *txo;
2143	u64 rx_pkts = 0, tx_pkts = 0;
2144	ulong now;
2145	u32 pps, delta;
2146	int i;
2147
2148	aic = &adapter->aic_obj[eqo->idx];
2149	if (!adapter->aic_enabled) {
2150		if (aic->jiffies)
2151			aic->jiffies = 0;
2152		eqd = aic->et_eqd;
2153		return eqd;
2154	}
2155
2156	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2157		do {
2158			start = u64_stats_fetch_begin(&rxo->stats.sync);
2159			rx_pkts += rxo->stats.rx_pkts;
2160		} while (u64_stats_fetch_retry(&rxo->stats.sync, start));
2161	}
2162
2163	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2164		do {
2165			start = u64_stats_fetch_begin(&txo->stats.sync);
2166			tx_pkts += txo->stats.tx_reqs;
2167		} while (u64_stats_fetch_retry(&txo->stats.sync, start));
2168	}
2169
2170	/* Skip, if wrapped around or first calculation */
2171	now = jiffies;
2172	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2173	    rx_pkts < aic->rx_pkts_prev ||
2174	    tx_pkts < aic->tx_reqs_prev) {
2175		be_aic_update(aic, rx_pkts, tx_pkts, now);
2176		return aic->prev_eqd;
2177	}
2178
2179	delta = jiffies_to_msecs(now - aic->jiffies);
2180	if (delta == 0)
2181		return aic->prev_eqd;
2182
2183	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2184		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2185	eqd = (pps / 15000) << 2;
2186
2187	if (eqd < 8)
2188		eqd = 0;
2189	eqd = min_t(u32, eqd, aic->max_eqd);
2190	eqd = max_t(u32, eqd, aic->min_eqd);
2191
2192	be_aic_update(aic, rx_pkts, tx_pkts, now);
2193
2194	return eqd;
2195}
2196
2197/* For Skyhawk-R only */
2198static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2199{
2200	struct be_adapter *adapter = eqo->adapter;
2201	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2202	ulong now = jiffies;
2203	int eqd;
2204	u32 mult_enc;
2205
2206	if (!adapter->aic_enabled)
2207		return 0;
2208
2209	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2210		eqd = aic->prev_eqd;
2211	else
2212		eqd = be_get_new_eqd(eqo);
2213
2214	if (eqd > 100)
2215		mult_enc = R2I_DLY_ENC_1;
2216	else if (eqd > 60)
2217		mult_enc = R2I_DLY_ENC_2;
2218	else if (eqd > 20)
2219		mult_enc = R2I_DLY_ENC_3;
2220	else
2221		mult_enc = R2I_DLY_ENC_0;
2222
2223	aic->prev_eqd = eqd;
2224
2225	return mult_enc;
2226}
2227
2228void be_eqd_update(struct be_adapter *adapter, bool force_update)
2229{
2230	struct be_set_eqd set_eqd[MAX_EVT_QS];
2231	struct be_aic_obj *aic;
2232	struct be_eq_obj *eqo;
2233	int i, num = 0, eqd;
2234
2235	for_all_evt_queues(adapter, eqo, i) {
2236		aic = &adapter->aic_obj[eqo->idx];
2237		eqd = be_get_new_eqd(eqo);
2238		if (force_update || eqd != aic->prev_eqd) {
2239			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2240			set_eqd[num].eq_id = eqo->q.id;
2241			aic->prev_eqd = eqd;
2242			num++;
2243		}
2244	}
2245
2246	if (num)
2247		be_cmd_modify_eqd(adapter, set_eqd, num);
2248}
2249
2250static void be_rx_stats_update(struct be_rx_obj *rxo,
2251			       struct be_rx_compl_info *rxcp)
2252{
2253	struct be_rx_stats *stats = rx_stats(rxo);
2254
2255	u64_stats_update_begin(&stats->sync);
2256	stats->rx_compl++;
2257	stats->rx_bytes += rxcp->pkt_size;
2258	stats->rx_pkts++;
2259	if (rxcp->tunneled)
2260		stats->rx_vxlan_offload_pkts++;
2261	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2262		stats->rx_mcast_pkts++;
2263	if (rxcp->err)
2264		stats->rx_compl_err++;
2265	u64_stats_update_end(&stats->sync);
2266}
2267
2268static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2269{
2270	/* L4 checksum is not reliable for non TCP/UDP packets.
2271	 * Also ignore ipcksm for ipv6 pkts
2272	 */
2273	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2274		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2275}
2276
2277static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2278{
2279	struct be_adapter *adapter = rxo->adapter;
2280	struct be_rx_page_info *rx_page_info;
2281	struct be_queue_info *rxq = &rxo->q;
2282	u32 frag_idx = rxq->tail;
2283
2284	rx_page_info = &rxo->page_info_tbl[frag_idx];
2285	BUG_ON(!rx_page_info->page);
2286
2287	if (rx_page_info->last_frag) {
2288		dma_unmap_page(&adapter->pdev->dev,
2289			       dma_unmap_addr(rx_page_info, bus),
2290			       adapter->big_page_size, DMA_FROM_DEVICE);
2291		rx_page_info->last_frag = false;
2292	} else {
2293		dma_sync_single_for_cpu(&adapter->pdev->dev,
2294					dma_unmap_addr(rx_page_info, bus),
2295					rx_frag_size, DMA_FROM_DEVICE);
2296	}
2297
2298	queue_tail_inc(rxq);
2299	atomic_dec(&rxq->used);
2300	return rx_page_info;
2301}
2302
2303/* Throwaway the data in the Rx completion */
2304static void be_rx_compl_discard(struct be_rx_obj *rxo,
2305				struct be_rx_compl_info *rxcp)
2306{
2307	struct be_rx_page_info *page_info;
2308	u16 i, num_rcvd = rxcp->num_rcvd;
2309
2310	for (i = 0; i < num_rcvd; i++) {
2311		page_info = get_rx_page_info(rxo);
2312		put_page(page_info->page);
2313		memset(page_info, 0, sizeof(*page_info));
2314	}
2315}
2316
2317/*
2318 * skb_fill_rx_data forms a complete skb for an ether frame
2319 * indicated by rxcp.
2320 */
2321static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2322			     struct be_rx_compl_info *rxcp)
2323{
2324	struct be_rx_page_info *page_info;
2325	u16 i, j;
2326	u16 hdr_len, curr_frag_len, remaining;
2327	u8 *start;
2328
2329	page_info = get_rx_page_info(rxo);
2330	start = page_address(page_info->page) + page_info->page_offset;
2331	prefetch(start);
2332
2333	/* Copy data in the first descriptor of this completion */
2334	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2335
2336	skb->len = curr_frag_len;
2337	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2338		memcpy(skb->data, start, curr_frag_len);
2339		/* Complete packet has now been moved to data */
2340		put_page(page_info->page);
2341		skb->data_len = 0;
2342		skb->tail += curr_frag_len;
2343	} else {
2344		hdr_len = ETH_HLEN;
2345		memcpy(skb->data, start, hdr_len);
2346		skb_shinfo(skb)->nr_frags = 1;
2347		skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[0],
2348					page_info->page,
2349					page_info->page_offset + hdr_len,
2350					curr_frag_len - hdr_len);
2351		skb->data_len = curr_frag_len - hdr_len;
2352		skb->truesize += rx_frag_size;
2353		skb->tail += hdr_len;
2354	}
2355	page_info->page = NULL;
2356
2357	if (rxcp->pkt_size <= rx_frag_size) {
2358		BUG_ON(rxcp->num_rcvd != 1);
2359		return;
2360	}
2361
2362	/* More frags present for this completion */
2363	remaining = rxcp->pkt_size - curr_frag_len;
2364	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2365		page_info = get_rx_page_info(rxo);
2366		curr_frag_len = min(remaining, rx_frag_size);
2367
2368		/* Coalesce all frags from the same physical page in one slot */
2369		if (page_info->page_offset == 0) {
2370			/* Fresh page */
2371			j++;
2372			skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
2373						page_info->page,
2374						page_info->page_offset,
2375						curr_frag_len);
2376			skb_shinfo(skb)->nr_frags++;
2377		} else {
2378			put_page(page_info->page);
2379			skb_frag_size_add(&skb_shinfo(skb)->frags[j],
2380					  curr_frag_len);
2381		}
2382
2383		skb->len += curr_frag_len;
2384		skb->data_len += curr_frag_len;
2385		skb->truesize += rx_frag_size;
2386		remaining -= curr_frag_len;
2387		page_info->page = NULL;
2388	}
2389	BUG_ON(j > MAX_SKB_FRAGS);
2390}
2391
2392/* Process the RX completion indicated by rxcp when GRO is disabled */
2393static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2394				struct be_rx_compl_info *rxcp)
2395{
2396	struct be_adapter *adapter = rxo->adapter;
2397	struct net_device *netdev = adapter->netdev;
2398	struct sk_buff *skb;
2399
2400	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2401	if (unlikely(!skb)) {
2402		rx_stats(rxo)->rx_drops_no_skbs++;
2403		be_rx_compl_discard(rxo, rxcp);
2404		return;
2405	}
2406
2407	skb_fill_rx_data(rxo, skb, rxcp);
2408
2409	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2410		skb->ip_summed = CHECKSUM_UNNECESSARY;
2411	else
2412		skb_checksum_none_assert(skb);
2413
2414	skb->protocol = eth_type_trans(skb, netdev);
2415	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2416	if (netdev->features & NETIF_F_RXHASH)
2417		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2418
2419	skb->csum_level = rxcp->tunneled;
2420	skb_mark_napi_id(skb, napi);
2421
2422	if (rxcp->vlanf)
2423		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2424
2425	netif_receive_skb(skb);
2426}
2427
2428/* Process the RX completion indicated by rxcp when GRO is enabled */
2429static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2430				    struct napi_struct *napi,
2431				    struct be_rx_compl_info *rxcp)
2432{
2433	struct be_adapter *adapter = rxo->adapter;
2434	struct be_rx_page_info *page_info;
2435	struct sk_buff *skb = NULL;
2436	u16 remaining, curr_frag_len;
2437	u16 i, j;
2438
2439	skb = napi_get_frags(napi);
2440	if (!skb) {
2441		be_rx_compl_discard(rxo, rxcp);
2442		return;
2443	}
2444
2445	remaining = rxcp->pkt_size;
2446	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2447		page_info = get_rx_page_info(rxo);
2448
2449		curr_frag_len = min(remaining, rx_frag_size);
2450
2451		/* Coalesce all frags from the same physical page in one slot */
2452		if (i == 0 || page_info->page_offset == 0) {
2453			/* First frag or Fresh page */
2454			j++;
2455			skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
2456						page_info->page,
2457						page_info->page_offset,
2458						curr_frag_len);
2459		} else {
2460			put_page(page_info->page);
2461			skb_frag_size_add(&skb_shinfo(skb)->frags[j],
2462					  curr_frag_len);
2463		}
2464
2465		skb->truesize += rx_frag_size;
2466		remaining -= curr_frag_len;
2467		memset(page_info, 0, sizeof(*page_info));
2468	}
2469	BUG_ON(j > MAX_SKB_FRAGS);
2470
2471	skb_shinfo(skb)->nr_frags = j + 1;
2472	skb->len = rxcp->pkt_size;
2473	skb->data_len = rxcp->pkt_size;
2474	skb->ip_summed = CHECKSUM_UNNECESSARY;
2475	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2476	if (adapter->netdev->features & NETIF_F_RXHASH)
2477		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2478
2479	skb->csum_level = rxcp->tunneled;
2480
2481	if (rxcp->vlanf)
2482		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2483
2484	napi_gro_frags(napi);
2485}
2486
2487static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2488				 struct be_rx_compl_info *rxcp)
2489{
2490	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2491	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2492	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2493	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2494	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2495	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2496	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2497	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2498	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2499	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2500	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2501	if (rxcp->vlanf) {
2502		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2503		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2504	}
2505	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2506	rxcp->tunneled =
2507		GET_RX_COMPL_V1_BITS(tunneled, compl);
2508}
2509
2510static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2511				 struct be_rx_compl_info *rxcp)
2512{
2513	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2514	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2515	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2516	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2517	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2518	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2519	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2520	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2521	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2522	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2523	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2524	if (rxcp->vlanf) {
2525		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2526		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2527	}
2528	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2529	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2530}
2531
2532static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2533{
2534	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2535	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2536	struct be_adapter *adapter = rxo->adapter;
2537
2538	/* For checking the valid bit it is Ok to use either definition as the
2539	 * valid bit is at the same position in both v0 and v1 Rx compl */
2540	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2541		return NULL;
2542
2543	rmb();
2544	be_dws_le_to_cpu(compl, sizeof(*compl));
2545
2546	if (adapter->be3_native)
2547		be_parse_rx_compl_v1(compl, rxcp);
2548	else
2549		be_parse_rx_compl_v0(compl, rxcp);
2550
2551	if (rxcp->ip_frag)
2552		rxcp->l4_csum = 0;
2553
2554	if (rxcp->vlanf) {
2555		/* In QNQ modes, if qnq bit is not set, then the packet was
2556		 * tagged only with the transparent outer vlan-tag and must
2557		 * not be treated as a vlan packet by host
2558		 */
2559		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2560			rxcp->vlanf = 0;
2561
2562		if (!lancer_chip(adapter))
2563			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2564
2565		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2566		    !test_bit(rxcp->vlan_tag, adapter->vids))
2567			rxcp->vlanf = 0;
2568	}
2569
2570	/* As the compl has been parsed, reset it; we wont touch it again */
2571	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2572
2573	queue_tail_inc(&rxo->cq);
2574	return rxcp;
2575}
2576
2577static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2578{
2579	u32 order = get_order(size);
2580
2581	if (order > 0)
2582		gfp |= __GFP_COMP;
2583	return  alloc_pages(gfp, order);
2584}
2585
2586/*
2587 * Allocate a page, split it to fragments of size rx_frag_size and post as
2588 * receive buffers to BE
2589 */
2590static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2591{
2592	struct be_adapter *adapter = rxo->adapter;
2593	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2594	struct be_queue_info *rxq = &rxo->q;
2595	struct page *pagep = NULL;
2596	struct device *dev = &adapter->pdev->dev;
2597	struct be_eth_rx_d *rxd;
2598	u64 page_dmaaddr = 0, frag_dmaaddr;
2599	u32 posted, page_offset = 0, notify = 0;
2600
2601	page_info = &rxo->page_info_tbl[rxq->head];
2602	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2603		if (!pagep) {
2604			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2605			if (unlikely(!pagep)) {
2606				rx_stats(rxo)->rx_post_fail++;
2607				break;
2608			}
2609			page_dmaaddr = dma_map_page(dev, pagep, 0,
2610						    adapter->big_page_size,
2611						    DMA_FROM_DEVICE);
2612			if (dma_mapping_error(dev, page_dmaaddr)) {
2613				put_page(pagep);
2614				pagep = NULL;
2615				adapter->drv_stats.dma_map_errors++;
2616				break;
2617			}
2618			page_offset = 0;
2619		} else {
2620			get_page(pagep);
2621			page_offset += rx_frag_size;
2622		}
2623		page_info->page_offset = page_offset;
2624		page_info->page = pagep;
2625
2626		rxd = queue_head_node(rxq);
2627		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2628		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2629		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2630
2631		/* Any space left in the current big page for another frag? */
2632		if ((page_offset + rx_frag_size + rx_frag_size) >
2633					adapter->big_page_size) {
2634			pagep = NULL;
2635			page_info->last_frag = true;
2636			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2637		} else {
2638			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2639		}
2640
2641		prev_page_info = page_info;
2642		queue_head_inc(rxq);
2643		page_info = &rxo->page_info_tbl[rxq->head];
2644	}
2645
2646	/* Mark the last frag of a page when we break out of the above loop
2647	 * with no more slots available in the RXQ
2648	 */
2649	if (pagep) {
2650		prev_page_info->last_frag = true;
2651		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2652	}
2653
2654	if (posted) {
2655		atomic_add(posted, &rxq->used);
2656		if (rxo->rx_post_starved)
2657			rxo->rx_post_starved = false;
2658		do {
2659			notify = min(MAX_NUM_POST_ERX_DB, posted);
2660			be_rxq_notify(adapter, rxq->id, notify);
2661			posted -= notify;
2662		} while (posted);
2663	} else if (atomic_read(&rxq->used) == 0) {
2664		/* Let be_worker replenish when memory is available */
2665		rxo->rx_post_starved = true;
2666	}
2667}
2668
2669static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2670{
2671	switch (status) {
2672	case BE_TX_COMP_HDR_PARSE_ERR:
2673		tx_stats(txo)->tx_hdr_parse_err++;
2674		break;
2675	case BE_TX_COMP_NDMA_ERR:
2676		tx_stats(txo)->tx_dma_err++;
2677		break;
2678	case BE_TX_COMP_ACL_ERR:
2679		tx_stats(txo)->tx_spoof_check_err++;
2680		break;
2681	}
2682}
2683
2684static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2685{
2686	switch (status) {
2687	case LANCER_TX_COMP_LSO_ERR:
2688		tx_stats(txo)->tx_tso_err++;
2689		break;
2690	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2691	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2692		tx_stats(txo)->tx_spoof_check_err++;
2693		break;
2694	case LANCER_TX_COMP_QINQ_ERR:
2695		tx_stats(txo)->tx_qinq_err++;
2696		break;
2697	case LANCER_TX_COMP_PARITY_ERR:
2698		tx_stats(txo)->tx_internal_parity_err++;
2699		break;
2700	case LANCER_TX_COMP_DMA_ERR:
2701		tx_stats(txo)->tx_dma_err++;
2702		break;
2703	case LANCER_TX_COMP_SGE_ERR:
2704		tx_stats(txo)->tx_sge_err++;
2705		break;
2706	}
2707}
2708
2709static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2710						struct be_tx_obj *txo)
2711{
2712	struct be_queue_info *tx_cq = &txo->cq;
2713	struct be_tx_compl_info *txcp = &txo->txcp;
2714	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2715
2716	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2717		return NULL;
2718
2719	/* Ensure load ordering of valid bit dword and other dwords below */
2720	rmb();
2721	be_dws_le_to_cpu(compl, sizeof(*compl));
2722
2723	txcp->status = GET_TX_COMPL_BITS(status, compl);
2724	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2725
2726	if (txcp->status) {
2727		if (lancer_chip(adapter)) {
2728			lancer_update_tx_err(txo, txcp->status);
2729			/* Reset the adapter incase of TSO,
2730			 * SGE or Parity error
2731			 */
2732			if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2733			    txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2734			    txcp->status == LANCER_TX_COMP_SGE_ERR)
2735				be_set_error(adapter, BE_ERROR_TX);
2736		} else {
2737			be_update_tx_err(txo, txcp->status);
2738		}
2739	}
2740
2741	if (be_check_error(adapter, BE_ERROR_TX))
2742		return NULL;
2743
2744	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2745	queue_tail_inc(tx_cq);
2746	return txcp;
2747}
2748
2749static u16 be_tx_compl_process(struct be_adapter *adapter,
2750			       struct be_tx_obj *txo, u16 last_index)
2751{
2752	struct sk_buff **sent_skbs = txo->sent_skb_list;
2753	struct be_queue_info *txq = &txo->q;
2754	struct sk_buff *skb = NULL;
2755	bool unmap_skb_hdr = false;
2756	struct be_eth_wrb *wrb;
2757	u16 num_wrbs = 0;
2758	u32 frag_index;
2759
2760	do {
2761		if (sent_skbs[txq->tail]) {
2762			/* Free skb from prev req */
2763			if (skb)
2764				dev_consume_skb_any(skb);
2765			skb = sent_skbs[txq->tail];
2766			sent_skbs[txq->tail] = NULL;
2767			queue_tail_inc(txq);  /* skip hdr wrb */
2768			num_wrbs++;
2769			unmap_skb_hdr = true;
2770		}
2771		wrb = queue_tail_node(txq);
2772		frag_index = txq->tail;
2773		unmap_tx_frag(&adapter->pdev->dev, wrb,
2774			      (unmap_skb_hdr && skb_headlen(skb)));
2775		unmap_skb_hdr = false;
2776		queue_tail_inc(txq);
2777		num_wrbs++;
2778	} while (frag_index != last_index);
2779	dev_consume_skb_any(skb);
2780
2781	return num_wrbs;
2782}
2783
2784/* Return the number of events in the event queue */
2785static inline int events_get(struct be_eq_obj *eqo)
2786{
2787	struct be_eq_entry *eqe;
2788	int num = 0;
2789
2790	do {
2791		eqe = queue_tail_node(&eqo->q);
2792		if (eqe->evt == 0)
2793			break;
2794
2795		rmb();
2796		eqe->evt = 0;
2797		num++;
2798		queue_tail_inc(&eqo->q);
2799	} while (true);
2800
2801	return num;
2802}
2803
2804/* Leaves the EQ is disarmed state */
2805static void be_eq_clean(struct be_eq_obj *eqo)
2806{
2807	int num = events_get(eqo);
2808
2809	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2810}
2811
2812/* Free posted rx buffers that were not used */
2813static void be_rxq_clean(struct be_rx_obj *rxo)
2814{
2815	struct be_queue_info *rxq = &rxo->q;
2816	struct be_rx_page_info *page_info;
2817
2818	while (atomic_read(&rxq->used) > 0) {
2819		page_info = get_rx_page_info(rxo);
2820		put_page(page_info->page);
2821		memset(page_info, 0, sizeof(*page_info));
2822	}
2823	BUG_ON(atomic_read(&rxq->used));
2824	rxq->tail = 0;
2825	rxq->head = 0;
2826}
2827
2828static void be_rx_cq_clean(struct be_rx_obj *rxo)
2829{
2830	struct be_queue_info *rx_cq = &rxo->cq;
2831	struct be_rx_compl_info *rxcp;
2832	struct be_adapter *adapter = rxo->adapter;
2833	int flush_wait = 0;
2834
2835	/* Consume pending rx completions.
2836	 * Wait for the flush completion (identified by zero num_rcvd)
2837	 * to arrive. Notify CQ even when there are no more CQ entries
2838	 * for HW to flush partially coalesced CQ entries.
2839	 * In Lancer, there is no need to wait for flush compl.
2840	 */
2841	for (;;) {
2842		rxcp = be_rx_compl_get(rxo);
2843		if (!rxcp) {
2844			if (lancer_chip(adapter))
2845				break;
2846
2847			if (flush_wait++ > 50 ||
2848			    be_check_error(adapter,
2849					   BE_ERROR_HW)) {
2850				dev_warn(&adapter->pdev->dev,
2851					 "did not receive flush compl\n");
2852				break;
2853			}
2854			be_cq_notify(adapter, rx_cq->id, true, 0);
2855			mdelay(1);
2856		} else {
2857			be_rx_compl_discard(rxo, rxcp);
2858			be_cq_notify(adapter, rx_cq->id, false, 1);
2859			if (rxcp->num_rcvd == 0)
2860				break;
2861		}
2862	}
2863
2864	/* After cleanup, leave the CQ in unarmed state */
2865	be_cq_notify(adapter, rx_cq->id, false, 0);
2866}
2867
2868static void be_tx_compl_clean(struct be_adapter *adapter)
2869{
2870	struct device *dev = &adapter->pdev->dev;
2871	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2872	struct be_tx_compl_info *txcp;
2873	struct be_queue_info *txq;
2874	u32 end_idx, notified_idx;
2875	struct be_tx_obj *txo;
2876	int i, pending_txqs;
2877
2878	/* Stop polling for compls when HW has been silent for 10ms */
2879	do {
2880		pending_txqs = adapter->num_tx_qs;
2881
2882		for_all_tx_queues(adapter, txo, i) {
2883			cmpl = 0;
2884			num_wrbs = 0;
2885			txq = &txo->q;
2886			while ((txcp = be_tx_compl_get(adapter, txo))) {
2887				num_wrbs +=
2888					be_tx_compl_process(adapter, txo,
2889							    txcp->end_index);
2890				cmpl++;
2891			}
2892			if (cmpl) {
2893				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2894				atomic_sub(num_wrbs, &txq->used);
2895				timeo = 0;
2896			}
2897			if (!be_is_tx_compl_pending(txo))
2898				pending_txqs--;
2899		}
2900
2901		if (pending_txqs == 0 || ++timeo > 10 ||
2902		    be_check_error(adapter, BE_ERROR_HW))
2903			break;
2904
2905		mdelay(1);
2906	} while (true);
2907
2908	/* Free enqueued TX that was never notified to HW */
2909	for_all_tx_queues(adapter, txo, i) {
2910		txq = &txo->q;
2911
2912		if (atomic_read(&txq->used)) {
2913			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2914				 i, atomic_read(&txq->used));
2915			notified_idx = txq->tail;
2916			end_idx = txq->tail;
2917			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2918				  txq->len);
2919			/* Use the tx-compl process logic to handle requests
2920			 * that were not sent to the HW.
2921			 */
2922			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2923			atomic_sub(num_wrbs, &txq->used);
2924			BUG_ON(atomic_read(&txq->used));
2925			txo->pend_wrb_cnt = 0;
2926			/* Since hw was never notified of these requests,
2927			 * reset TXQ indices
2928			 */
2929			txq->head = notified_idx;
2930			txq->tail = notified_idx;
2931		}
2932	}
2933}
2934
2935static void be_evt_queues_destroy(struct be_adapter *adapter)
2936{
2937	struct be_eq_obj *eqo;
2938	int i;
2939
2940	for_all_evt_queues(adapter, eqo, i) {
2941		if (eqo->q.created) {
2942			be_eq_clean(eqo);
2943			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2944			netif_napi_del(&eqo->napi);
2945			free_cpumask_var(eqo->affinity_mask);
2946		}
2947		be_queue_free(adapter, &eqo->q);
2948	}
2949}
2950
2951static int be_evt_queues_create(struct be_adapter *adapter)
2952{
2953	struct be_queue_info *eq;
2954	struct be_eq_obj *eqo;
2955	struct be_aic_obj *aic;
2956	int i, rc;
2957
2958	/* need enough EQs to service both RX and TX queues */
2959	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2960				    max(adapter->cfg_num_rx_irqs,
2961					adapter->cfg_num_tx_irqs));
2962
2963	adapter->aic_enabled = true;
2964
2965	for_all_evt_queues(adapter, eqo, i) {
2966		int numa_node = dev_to_node(&adapter->pdev->dev);
2967
2968		aic = &adapter->aic_obj[i];
2969		eqo->adapter = adapter;
2970		eqo->idx = i;
2971		aic->max_eqd = BE_MAX_EQD;
2972
2973		eq = &eqo->q;
2974		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2975				    sizeof(struct be_eq_entry));
2976		if (rc)
2977			return rc;
2978
2979		rc = be_cmd_eq_create(adapter, eqo);
2980		if (rc)
2981			return rc;
2982
2983		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2984			return -ENOMEM;
2985		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2986				eqo->affinity_mask);
2987		netif_napi_add(adapter->netdev, &eqo->napi, be_poll);
2988	}
2989	return 0;
2990}
2991
2992static void be_mcc_queues_destroy(struct be_adapter *adapter)
2993{
2994	struct be_queue_info *q;
2995
2996	q = &adapter->mcc_obj.q;
2997	if (q->created)
2998		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2999	be_queue_free(adapter, q);
3000
3001	q = &adapter->mcc_obj.cq;
3002	if (q->created)
3003		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3004	be_queue_free(adapter, q);
3005}
3006
3007/* Must be called only after TX qs are created as MCC shares TX EQ */
3008static int be_mcc_queues_create(struct be_adapter *adapter)
3009{
3010	struct be_queue_info *q, *cq;
3011
3012	cq = &adapter->mcc_obj.cq;
3013	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3014			   sizeof(struct be_mcc_compl)))
3015		goto err;
3016
3017	/* Use the default EQ for MCC completions */
3018	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3019		goto mcc_cq_free;
3020
3021	q = &adapter->mcc_obj.q;
3022	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3023		goto mcc_cq_destroy;
3024
3025	if (be_cmd_mccq_create(adapter, q, cq))
3026		goto mcc_q_free;
3027
3028	return 0;
3029
3030mcc_q_free:
3031	be_queue_free(adapter, q);
3032mcc_cq_destroy:
3033	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3034mcc_cq_free:
3035	be_queue_free(adapter, cq);
3036err:
3037	return -1;
3038}
3039
3040static void be_tx_queues_destroy(struct be_adapter *adapter)
3041{
3042	struct be_queue_info *q;
3043	struct be_tx_obj *txo;
3044	u8 i;
3045
3046	for_all_tx_queues(adapter, txo, i) {
3047		q = &txo->q;
3048		if (q->created)
3049			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3050		be_queue_free(adapter, q);
3051
3052		q = &txo->cq;
3053		if (q->created)
3054			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3055		be_queue_free(adapter, q);
3056	}
3057}
3058
3059static int be_tx_qs_create(struct be_adapter *adapter)
3060{
3061	struct be_queue_info *cq;
3062	struct be_tx_obj *txo;
3063	struct be_eq_obj *eqo;
3064	int status, i;
3065
3066	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3067
3068	for_all_tx_queues(adapter, txo, i) {
3069		cq = &txo->cq;
3070		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3071					sizeof(struct be_eth_tx_compl));
3072		if (status)
3073			return status;
3074
3075		u64_stats_init(&txo->stats.sync);
3076		u64_stats_init(&txo->stats.sync_compl);
3077
3078		/* If num_evt_qs is less than num_tx_qs, then more than
3079		 * one txq share an eq
3080		 */
3081		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3082		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3083		if (status)
3084			return status;
3085
3086		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3087					sizeof(struct be_eth_wrb));
3088		if (status)
3089			return status;
3090
3091		status = be_cmd_txq_create(adapter, txo);
3092		if (status)
3093			return status;
3094
3095		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3096				    eqo->idx);
3097	}
3098
3099	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3100		 adapter->num_tx_qs);
3101	return 0;
3102}
3103
3104static void be_rx_cqs_destroy(struct be_adapter *adapter)
3105{
3106	struct be_queue_info *q;
3107	struct be_rx_obj *rxo;
3108	int i;
3109
3110	for_all_rx_queues(adapter, rxo, i) {
3111		q = &rxo->cq;
3112		if (q->created)
3113			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3114		be_queue_free(adapter, q);
3115	}
3116}
3117
3118static int be_rx_cqs_create(struct be_adapter *adapter)
3119{
3120	struct be_queue_info *eq, *cq;
3121	struct be_rx_obj *rxo;
3122	int rc, i;
3123
3124	adapter->num_rss_qs =
3125			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3126
3127	/* We'll use RSS only if atleast 2 RSS rings are supported. */
3128	if (adapter->num_rss_qs < 2)
3129		adapter->num_rss_qs = 0;
3130
3131	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3132
3133	/* When the interface is not capable of RSS rings (and there is no
3134	 * need to create a default RXQ) we'll still need one RXQ
3135	 */
3136	if (adapter->num_rx_qs == 0)
3137		adapter->num_rx_qs = 1;
3138
3139	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3140	for_all_rx_queues(adapter, rxo, i) {
3141		rxo->adapter = adapter;
3142		cq = &rxo->cq;
3143		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3144				    sizeof(struct be_eth_rx_compl));
3145		if (rc)
3146			return rc;
3147
3148		u64_stats_init(&rxo->stats.sync);
3149		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3150		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3151		if (rc)
3152			return rc;
3153	}
3154
3155	dev_info(&adapter->pdev->dev,
3156		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3157	return 0;
3158}
3159
3160static irqreturn_t be_intx(int irq, void *dev)
3161{
3162	struct be_eq_obj *eqo = dev;
3163	struct be_adapter *adapter = eqo->adapter;
3164	int num_evts = 0;
3165
3166	/* IRQ is not expected when NAPI is scheduled as the EQ
3167	 * will not be armed.
3168	 * But, this can happen on Lancer INTx where it takes
3169	 * a while to de-assert INTx or in BE2 where occasionaly
3170	 * an interrupt may be raised even when EQ is unarmed.
3171	 * If NAPI is already scheduled, then counting & notifying
3172	 * events will orphan them.
3173	 */
3174	if (napi_schedule_prep(&eqo->napi)) {
3175		num_evts = events_get(eqo);
3176		__napi_schedule(&eqo->napi);
3177		if (num_evts)
3178			eqo->spurious_intr = 0;
3179	}
3180	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3181
3182	/* Return IRQ_HANDLED only for the first spurious intr
3183	 * after a valid intr to stop the kernel from branding
3184	 * this irq as a bad one!
3185	 */
3186	if (num_evts || eqo->spurious_intr++ == 0)
3187		return IRQ_HANDLED;
3188	else
3189		return IRQ_NONE;
3190}
3191
3192static irqreturn_t be_msix(int irq, void *dev)
3193{
3194	struct be_eq_obj *eqo = dev;
3195
3196	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3197	napi_schedule(&eqo->napi);
3198	return IRQ_HANDLED;
3199}
3200
3201static inline bool do_gro(struct be_rx_compl_info *rxcp)
3202{
3203	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3204}
3205
3206static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3207			 int budget)
3208{
3209	struct be_adapter *adapter = rxo->adapter;
3210	struct be_queue_info *rx_cq = &rxo->cq;
3211	struct be_rx_compl_info *rxcp;
3212	u32 work_done;
3213	u32 frags_consumed = 0;
3214
3215	for (work_done = 0; work_done < budget; work_done++) {
3216		rxcp = be_rx_compl_get(rxo);
3217		if (!rxcp)
3218			break;
3219
3220		/* Is it a flush compl that has no data */
3221		if (unlikely(rxcp->num_rcvd == 0))
3222			goto loop_continue;
3223
3224		/* Discard compl with partial DMA Lancer B0 */
3225		if (unlikely(!rxcp->pkt_size)) {
3226			be_rx_compl_discard(rxo, rxcp);
3227			goto loop_continue;
3228		}
3229
3230		/* On BE drop pkts that arrive due to imperfect filtering in
3231		 * promiscuous mode on some skews
3232		 */
3233		if (unlikely(rxcp->port != adapter->port_num &&
3234			     !lancer_chip(adapter))) {
3235			be_rx_compl_discard(rxo, rxcp);
3236			goto loop_continue;
3237		}
3238
3239		if (do_gro(rxcp))
3240			be_rx_compl_process_gro(rxo, napi, rxcp);
3241		else
3242			be_rx_compl_process(rxo, napi, rxcp);
3243
3244loop_continue:
3245		frags_consumed += rxcp->num_rcvd;
3246		be_rx_stats_update(rxo, rxcp);
3247	}
3248
3249	if (work_done) {
3250		be_cq_notify(adapter, rx_cq->id, true, work_done);
3251
3252		/* When an rx-obj gets into post_starved state, just
3253		 * let be_worker do the posting.
3254		 */
3255		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3256		    !rxo->rx_post_starved)
3257			be_post_rx_frags(rxo, GFP_ATOMIC,
3258					 max_t(u32, MAX_RX_POST,
3259					       frags_consumed));
3260	}
3261
3262	return work_done;
3263}
3264
3265
3266static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3267			  int idx)
3268{
3269	int num_wrbs = 0, work_done = 0;
3270	struct be_tx_compl_info *txcp;
3271
3272	while ((txcp = be_tx_compl_get(adapter, txo))) {
3273		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3274		work_done++;
3275	}
3276
3277	if (work_done) {
3278		be_cq_notify(adapter, txo->cq.id, true, work_done);
3279		atomic_sub(num_wrbs, &txo->q.used);
3280
3281		/* As Tx wrbs have been freed up, wake up netdev queue
3282		 * if it was stopped due to lack of tx wrbs.  */
3283		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3284		    be_can_txq_wake(txo)) {
3285			netif_wake_subqueue(adapter->netdev, idx);
3286		}
3287
3288		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3289		tx_stats(txo)->tx_compl += work_done;
3290		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3291	}
3292}
3293
3294int be_poll(struct napi_struct *napi, int budget)
3295{
3296	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3297	struct be_adapter *adapter = eqo->adapter;
3298	int max_work = 0, work, i, num_evts;
3299	struct be_rx_obj *rxo;
3300	struct be_tx_obj *txo;
3301	u32 mult_enc = 0;
3302
3303	num_evts = events_get(eqo);
3304
3305	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3306		be_process_tx(adapter, txo, i);
3307
3308	/* This loop will iterate twice for EQ0 in which
3309	 * completions of the last RXQ (default one) are also processed
3310	 * For other EQs the loop iterates only once
3311	 */
3312	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3313		work = be_process_rx(rxo, napi, budget);
3314		max_work = max(work, max_work);
3315	}
3316
3317	if (is_mcc_eqo(eqo))
3318		be_process_mcc(adapter);
3319
3320	if (max_work < budget) {
3321		napi_complete_done(napi, max_work);
3322
3323		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3324		 * delay via a delay multiplier encoding value
3325		 */
3326		if (skyhawk_chip(adapter))
3327			mult_enc = be_get_eq_delay_mult_enc(eqo);
3328
3329		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3330			     mult_enc);
3331	} else {
3332		/* As we'll continue in polling mode, count and clear events */
3333		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3334	}
3335	return max_work;
3336}
3337
3338void be_detect_error(struct be_adapter *adapter)
3339{
3340	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3341	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3342	struct device *dev = &adapter->pdev->dev;
3343	u16 val;
3344	u32 i;
3345
3346	if (be_check_error(adapter, BE_ERROR_HW))
3347		return;
3348
3349	if (lancer_chip(adapter)) {
3350		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3351		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3352			be_set_error(adapter, BE_ERROR_UE);
3353			sliport_err1 = ioread32(adapter->db +
3354						SLIPORT_ERROR1_OFFSET);
3355			sliport_err2 = ioread32(adapter->db +
3356						SLIPORT_ERROR2_OFFSET);
3357			/* Do not log error messages if its a FW reset */
3358			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3359			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3360				dev_info(dev, "Reset is in progress\n");
3361			} else {
3362				dev_err(dev, "Error detected in the card\n");
3363				dev_err(dev, "ERR: sliport status 0x%x\n",
3364					sliport_status);
3365				dev_err(dev, "ERR: sliport error1 0x%x\n",
3366					sliport_err1);
3367				dev_err(dev, "ERR: sliport error2 0x%x\n",
3368					sliport_err2);
3369			}
3370		}
3371	} else {
3372		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3373		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3374		ue_lo_mask = ioread32(adapter->pcicfg +
3375				      PCICFG_UE_STATUS_LOW_MASK);
3376		ue_hi_mask = ioread32(adapter->pcicfg +
3377				      PCICFG_UE_STATUS_HI_MASK);
3378
3379		ue_lo = (ue_lo & ~ue_lo_mask);
3380		ue_hi = (ue_hi & ~ue_hi_mask);
3381
3382		if (ue_lo || ue_hi) {
3383			/* On certain platforms BE3 hardware can indicate
3384			 * spurious UEs. In case of a UE in the chip,
3385			 * the POST register correctly reports either a
3386			 * FAT_LOG_START state (FW is currently dumping
3387			 * FAT log data) or a ARMFW_UE state. Check for the
3388			 * above states to ascertain if the UE is valid or not.
3389			 */
3390			if (BE3_chip(adapter)) {
3391				val = be_POST_stage_get(adapter);
3392				if ((val & POST_STAGE_FAT_LOG_START)
3393				     != POST_STAGE_FAT_LOG_START &&
3394				    (val & POST_STAGE_ARMFW_UE)
3395				     != POST_STAGE_ARMFW_UE &&
3396				    (val & POST_STAGE_RECOVERABLE_ERR)
3397				     != POST_STAGE_RECOVERABLE_ERR)
3398					return;
3399			}
3400
3401			dev_err(dev, "Error detected in the adapter");
3402			be_set_error(adapter, BE_ERROR_UE);
3403
3404			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3405				if (ue_lo & 1)
3406					dev_err(dev, "UE: %s bit set\n",
3407						ue_status_low_desc[i]);
3408			}
3409			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3410				if (ue_hi & 1)
3411					dev_err(dev, "UE: %s bit set\n",
3412						ue_status_hi_desc[i]);
3413			}
3414		}
3415	}
3416}
3417
3418static void be_msix_disable(struct be_adapter *adapter)
3419{
3420	if (msix_enabled(adapter)) {
3421		pci_disable_msix(adapter->pdev);
3422		adapter->num_msix_vec = 0;
3423		adapter->num_msix_roce_vec = 0;
3424	}
3425}
3426
3427static int be_msix_enable(struct be_adapter *adapter)
3428{
3429	unsigned int i, max_roce_eqs;
3430	struct device *dev = &adapter->pdev->dev;
3431	int num_vec;
3432
3433	/* If RoCE is supported, program the max number of vectors that
3434	 * could be used for NIC and RoCE, else, just program the number
3435	 * we'll use initially.
3436	 */
3437	if (be_roce_supported(adapter)) {
3438		max_roce_eqs =
3439			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3440		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3441		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3442	} else {
3443		num_vec = max(adapter->cfg_num_rx_irqs,
3444			      adapter->cfg_num_tx_irqs);
3445	}
3446
3447	for (i = 0; i < num_vec; i++)
3448		adapter->msix_entries[i].entry = i;
3449
3450	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3451					MIN_MSIX_VECTORS, num_vec);
3452	if (num_vec < 0)
3453		goto fail;
3454
3455	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3456		adapter->num_msix_roce_vec = num_vec / 2;
3457		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3458			 adapter->num_msix_roce_vec);
3459	}
3460
3461	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3462
3463	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3464		 adapter->num_msix_vec);
3465	return 0;
3466
3467fail:
3468	dev_warn(dev, "MSIx enable failed\n");
3469
3470	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3471	if (be_virtfn(adapter))
3472		return num_vec;
3473	return 0;
3474}
3475
3476static inline int be_msix_vec_get(struct be_adapter *adapter,
3477				  struct be_eq_obj *eqo)
3478{
3479	return adapter->msix_entries[eqo->msix_idx].vector;
3480}
3481
3482static int be_msix_register(struct be_adapter *adapter)
3483{
3484	struct net_device *netdev = adapter->netdev;
3485	struct be_eq_obj *eqo;
3486	int status, i, vec;
3487
3488	for_all_evt_queues(adapter, eqo, i) {
3489		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3490		vec = be_msix_vec_get(adapter, eqo);
3491		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3492		if (status)
3493			goto err_msix;
3494
3495		irq_update_affinity_hint(vec, eqo->affinity_mask);
3496	}
3497
3498	return 0;
3499err_msix:
3500	for (i--; i >= 0; i--) {
3501		eqo = &adapter->eq_obj[i];
3502		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3503	}
3504	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3505		 status);
3506	be_msix_disable(adapter);
3507	return status;
3508}
3509
3510static int be_irq_register(struct be_adapter *adapter)
3511{
3512	struct net_device *netdev = adapter->netdev;
3513	int status;
3514
3515	if (msix_enabled(adapter)) {
3516		status = be_msix_register(adapter);
3517		if (status == 0)
3518			goto done;
3519		/* INTx is not supported for VF */
3520		if (be_virtfn(adapter))
3521			return status;
3522	}
3523
3524	/* INTx: only the first EQ is used */
3525	netdev->irq = adapter->pdev->irq;
3526	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3527			     &adapter->eq_obj[0]);
3528	if (status) {
3529		dev_err(&adapter->pdev->dev,
3530			"INTx request IRQ failed - err %d\n", status);
3531		return status;
3532	}
3533done:
3534	adapter->isr_registered = true;
3535	return 0;
3536}
3537
3538static void be_irq_unregister(struct be_adapter *adapter)
3539{
3540	struct net_device *netdev = adapter->netdev;
3541	struct be_eq_obj *eqo;
3542	int i, vec;
3543
3544	if (!adapter->isr_registered)
3545		return;
3546
3547	/* INTx */
3548	if (!msix_enabled(adapter)) {
3549		free_irq(netdev->irq, &adapter->eq_obj[0]);
3550		goto done;
3551	}
3552
3553	/* MSIx */
3554	for_all_evt_queues(adapter, eqo, i) {
3555		vec = be_msix_vec_get(adapter, eqo);
3556		irq_update_affinity_hint(vec, NULL);
3557		free_irq(vec, eqo);
3558	}
3559
3560done:
3561	adapter->isr_registered = false;
3562}
3563
3564static void be_rx_qs_destroy(struct be_adapter *adapter)
3565{
3566	struct rss_info *rss = &adapter->rss_info;
3567	struct be_queue_info *q;
3568	struct be_rx_obj *rxo;
3569	int i;
3570
3571	for_all_rx_queues(adapter, rxo, i) {
3572		q = &rxo->q;
3573		if (q->created) {
3574			/* If RXQs are destroyed while in an "out of buffer"
3575			 * state, there is a possibility of an HW stall on
3576			 * Lancer. So, post 64 buffers to each queue to relieve
3577			 * the "out of buffer" condition.
3578			 * Make sure there's space in the RXQ before posting.
3579			 */
3580			if (lancer_chip(adapter)) {
3581				be_rx_cq_clean(rxo);
3582				if (atomic_read(&q->used) == 0)
3583					be_post_rx_frags(rxo, GFP_KERNEL,
3584							 MAX_RX_POST);
3585			}
3586
3587			be_cmd_rxq_destroy(adapter, q);
3588			be_rx_cq_clean(rxo);
3589			be_rxq_clean(rxo);
3590		}
3591		be_queue_free(adapter, q);
3592	}
3593
3594	if (rss->rss_flags) {
3595		rss->rss_flags = RSS_ENABLE_NONE;
3596		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3597				  128, rss->rss_hkey);
3598	}
3599}
3600
3601static void be_disable_if_filters(struct be_adapter *adapter)
3602{
3603	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3604	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3605	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3606		be_dev_mac_del(adapter, adapter->pmac_id[0]);
3607		eth_zero_addr(adapter->dev_mac);
3608	}
3609
3610	be_clear_uc_list(adapter);
3611	be_clear_mc_list(adapter);
3612
3613	/* The IFACE flags are enabled in the open path and cleared
3614	 * in the close path. When a VF gets detached from the host and
3615	 * assigned to a VM the following happens:
3616	 *	- VF's IFACE flags get cleared in the detach path
3617	 *	- IFACE create is issued by the VF in the attach path
3618	 * Due to a bug in the BE3/Skyhawk-R FW
3619	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3620	 * specified along with the IFACE create cmd issued by a VF are not
3621	 * honoured by FW.  As a consequence, if a *new* driver
3622	 * (that enables/disables IFACE flags in open/close)
3623	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3624	 * the IFACE gets created *without* the needed flags.
3625	 * To avoid this, disable RX-filter flags only for Lancer.
3626	 */
3627	if (lancer_chip(adapter)) {
3628		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3629		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3630	}
3631}
3632
3633static int be_close(struct net_device *netdev)
3634{
3635	struct be_adapter *adapter = netdev_priv(netdev);
3636	struct be_eq_obj *eqo;
3637	int i;
3638
3639	/* This protection is needed as be_close() may be called even when the
3640	 * adapter is in cleared state (after eeh perm failure)
3641	 */
3642	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3643		return 0;
3644
3645	/* Before attempting cleanup ensure all the pending cmds in the
3646	 * config_wq have finished execution
3647	 */
3648	flush_workqueue(be_wq);
3649
3650	be_disable_if_filters(adapter);
3651
3652	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3653		for_all_evt_queues(adapter, eqo, i) {
3654			napi_disable(&eqo->napi);
3655		}
3656		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3657	}
3658
3659	be_async_mcc_disable(adapter);
3660
3661	/* Wait for all pending tx completions to arrive so that
3662	 * all tx skbs are freed.
3663	 */
3664	netif_tx_disable(netdev);
3665	be_tx_compl_clean(adapter);
3666
3667	be_rx_qs_destroy(adapter);
3668
3669	for_all_evt_queues(adapter, eqo, i) {
3670		if (msix_enabled(adapter))
3671			synchronize_irq(be_msix_vec_get(adapter, eqo));
3672		else
3673			synchronize_irq(netdev->irq);
3674		be_eq_clean(eqo);
3675	}
3676
3677	be_irq_unregister(adapter);
3678
3679	return 0;
3680}
3681
3682static int be_rx_qs_create(struct be_adapter *adapter)
3683{
3684	struct rss_info *rss = &adapter->rss_info;
3685	u8 rss_key[RSS_HASH_KEY_LEN];
3686	struct be_rx_obj *rxo;
3687	int rc, i, j;
3688
3689	for_all_rx_queues(adapter, rxo, i) {
3690		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3691				    sizeof(struct be_eth_rx_d));
3692		if (rc)
3693			return rc;
3694	}
3695
3696	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3697		rxo = default_rxo(adapter);
3698		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3699				       rx_frag_size, adapter->if_handle,
3700				       false, &rxo->rss_id);
3701		if (rc)
3702			return rc;
3703	}
3704
3705	for_all_rss_queues(adapter, rxo, i) {
3706		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3707				       rx_frag_size, adapter->if_handle,
3708				       true, &rxo->rss_id);
3709		if (rc)
3710			return rc;
3711	}
3712
3713	if (be_multi_rxq(adapter)) {
3714		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3715			for_all_rss_queues(adapter, rxo, i) {
3716				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3717					break;
3718				rss->rsstable[j + i] = rxo->rss_id;
3719				rss->rss_queue[j + i] = i;
3720			}
3721		}
3722		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3723			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3724
3725		if (!BEx_chip(adapter))
3726			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3727				RSS_ENABLE_UDP_IPV6;
3728
3729		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3730		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3731				       RSS_INDIR_TABLE_LEN, rss_key);
3732		if (rc) {
3733			rss->rss_flags = RSS_ENABLE_NONE;
3734			return rc;
3735		}
3736
3737		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3738	} else {
3739		/* Disable RSS, if only default RX Q is created */
3740		rss->rss_flags = RSS_ENABLE_NONE;
3741	}
3742
3743
3744	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3745	 * which is a queue empty condition
3746	 */
3747	for_all_rx_queues(adapter, rxo, i)
3748		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3749
3750	return 0;
3751}
3752
3753static int be_enable_if_filters(struct be_adapter *adapter)
3754{
3755	int status;
3756
3757	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3758	if (status)
3759		return status;
3760
3761	/* Normally this condition usually true as the ->dev_mac is zeroed.
3762	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3763	 * subsequent be_dev_mac_add() can fail (after fresh boot)
3764	 */
3765	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3766		int old_pmac_id = -1;
3767
3768		/* Remember old programmed MAC if any - can happen on BE3 VF */
3769		if (!is_zero_ether_addr(adapter->dev_mac))
3770			old_pmac_id = adapter->pmac_id[0];
3771
3772		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3773		if (status)
3774			return status;
3775
3776		/* Delete the old programmed MAC as we successfully programmed
3777		 * a new MAC
3778		 */
3779		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3780			be_dev_mac_del(adapter, old_pmac_id);
3781
3782		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3783	}
3784
3785	if (adapter->vlans_added)
3786		be_vid_config(adapter);
3787
3788	__be_set_rx_mode(adapter);
3789
3790	return 0;
3791}
3792
3793static int be_open(struct net_device *netdev)
3794{
3795	struct be_adapter *adapter = netdev_priv(netdev);
3796	struct be_eq_obj *eqo;
3797	struct be_rx_obj *rxo;
3798	struct be_tx_obj *txo;
3799	u8 link_status;
3800	int status, i;
3801
3802	status = be_rx_qs_create(adapter);
3803	if (status)
3804		goto err;
3805
3806	status = be_enable_if_filters(adapter);
3807	if (status)
3808		goto err;
3809
3810	status = be_irq_register(adapter);
3811	if (status)
3812		goto err;
3813
3814	for_all_rx_queues(adapter, rxo, i)
3815		be_cq_notify(adapter, rxo->cq.id, true, 0);
3816
3817	for_all_tx_queues(adapter, txo, i)
3818		be_cq_notify(adapter, txo->cq.id, true, 0);
3819
3820	be_async_mcc_enable(adapter);
3821
3822	for_all_evt_queues(adapter, eqo, i) {
3823		napi_enable(&eqo->napi);
3824		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3825	}
3826	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3827
3828	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3829	if (!status)
3830		be_link_status_update(adapter, link_status);
3831
3832	netif_tx_start_all_queues(netdev);
3833
3834	udp_tunnel_nic_reset_ntf(netdev);
3835
3836	return 0;
3837err:
3838	be_close(adapter->netdev);
3839	return -EIO;
3840}
3841
3842static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3843{
3844	u32 addr;
3845
3846	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3847
3848	mac[5] = (u8)(addr & 0xFF);
3849	mac[4] = (u8)((addr >> 8) & 0xFF);
3850	mac[3] = (u8)((addr >> 16) & 0xFF);
3851	/* Use the OUI from the current MAC address */
3852	memcpy(mac, adapter->netdev->dev_addr, 3);
3853}
3854
3855/*
3856 * Generate a seed MAC address from the PF MAC Address using jhash.
3857 * MAC Address for VFs are assigned incrementally starting from the seed.
3858 * These addresses are programmed in the ASIC by the PF and the VF driver
3859 * queries for the MAC address during its probe.
3860 */
3861static int be_vf_eth_addr_config(struct be_adapter *adapter)
3862{
3863	u32 vf;
3864	int status = 0;
3865	u8 mac[ETH_ALEN];
3866	struct be_vf_cfg *vf_cfg;
3867
3868	be_vf_eth_addr_generate(adapter, mac);
3869
3870	for_all_vfs(adapter, vf_cfg, vf) {
3871		if (BEx_chip(adapter))
3872			status = be_cmd_pmac_add(adapter, mac,
3873						 vf_cfg->if_handle,
3874						 &vf_cfg->pmac_id, vf + 1);
3875		else
3876			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3877						vf + 1);
3878
3879		if (status)
3880			dev_err(&adapter->pdev->dev,
3881				"Mac address assignment failed for VF %d\n",
3882				vf);
3883		else
3884			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3885
3886		mac[5] += 1;
3887	}
3888	return status;
3889}
3890
3891static int be_vfs_mac_query(struct be_adapter *adapter)
3892{
3893	int status, vf;
3894	u8 mac[ETH_ALEN];
3895	struct be_vf_cfg *vf_cfg;
3896
3897	for_all_vfs(adapter, vf_cfg, vf) {
3898		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3899					       mac, vf_cfg->if_handle,
3900					       false, vf+1);
3901		if (status)
3902			return status;
3903		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3904	}
3905	return 0;
3906}
3907
3908static void be_vf_clear(struct be_adapter *adapter)
3909{
3910	struct be_vf_cfg *vf_cfg;
3911	u32 vf;
3912
3913	if (pci_vfs_assigned(adapter->pdev)) {
3914		dev_warn(&adapter->pdev->dev,
3915			 "VFs are assigned to VMs: not disabling VFs\n");
3916		goto done;
3917	}
3918
3919	pci_disable_sriov(adapter->pdev);
3920
3921	for_all_vfs(adapter, vf_cfg, vf) {
3922		if (BEx_chip(adapter))
3923			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3924					vf_cfg->pmac_id, vf + 1);
3925		else
3926			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3927				       vf + 1);
3928
3929		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3930	}
3931
3932	if (BE3_chip(adapter))
3933		be_cmd_set_hsw_config(adapter, 0, 0,
3934				      adapter->if_handle,
3935				      PORT_FWD_TYPE_PASSTHRU, 0);
3936done:
3937	kfree(adapter->vf_cfg);
3938	adapter->num_vfs = 0;
3939	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3940}
3941
3942static void be_clear_queues(struct be_adapter *adapter)
3943{
3944	be_mcc_queues_destroy(adapter);
3945	be_rx_cqs_destroy(adapter);
3946	be_tx_queues_destroy(adapter);
3947	be_evt_queues_destroy(adapter);
3948}
3949
3950static void be_cancel_worker(struct be_adapter *adapter)
3951{
3952	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3953		cancel_delayed_work_sync(&adapter->work);
3954		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3955	}
3956}
3957
3958static void be_cancel_err_detection(struct be_adapter *adapter)
3959{
3960	struct be_error_recovery *err_rec = &adapter->error_recovery;
3961
3962	if (!be_err_recovery_workq)
3963		return;
3964
3965	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3966		cancel_delayed_work_sync(&err_rec->err_detection_work);
3967		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3968	}
3969}
3970
3971/* VxLAN offload Notes:
3972 *
3973 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
3974 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
3975 * is expected to work across all types of IP tunnels once exported. Skyhawk
3976 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
3977 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
3978 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
3979 * those other tunnels are unexported on the fly through ndo_features_check().
3980 */
3981static int be_vxlan_set_port(struct net_device *netdev, unsigned int table,
3982			     unsigned int entry, struct udp_tunnel_info *ti)
3983{
3984	struct be_adapter *adapter = netdev_priv(netdev);
3985	struct device *dev = &adapter->pdev->dev;
3986	int status;
3987
3988	status = be_cmd_manage_iface(adapter, adapter->if_handle,
3989				     OP_CONVERT_NORMAL_TO_TUNNEL);
3990	if (status) {
3991		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3992		return status;
3993	}
3994	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3995
3996	status = be_cmd_set_vxlan_port(adapter, ti->port);
3997	if (status) {
3998		dev_warn(dev, "Failed to add VxLAN port\n");
3999		return status;
4000	}
4001	adapter->vxlan_port = ti->port;
4002
4003	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4004				   NETIF_F_TSO | NETIF_F_TSO6 |
4005				   NETIF_F_GSO_UDP_TUNNEL;
4006
4007	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4008		 be16_to_cpu(ti->port));
4009	return 0;
4010}
4011
4012static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
4013			       unsigned int entry, struct udp_tunnel_info *ti)
4014{
4015	struct be_adapter *adapter = netdev_priv(netdev);
4016
4017	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4018		be_cmd_manage_iface(adapter, adapter->if_handle,
4019				    OP_CONVERT_TUNNEL_TO_NORMAL);
4020
4021	if (adapter->vxlan_port)
4022		be_cmd_set_vxlan_port(adapter, 0);
4023
4024	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4025	adapter->vxlan_port = 0;
4026
4027	netdev->hw_enc_features = 0;
4028	return 0;
4029}
4030
4031static const struct udp_tunnel_nic_info be_udp_tunnels = {
4032	.set_port	= be_vxlan_set_port,
4033	.unset_port	= be_vxlan_unset_port,
4034	.flags		= UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
4035			  UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
4036	.tables		= {
4037		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
4038	},
4039};
4040
4041static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4042				struct be_resources *vft_res)
4043{
4044	struct be_resources res = adapter->pool_res;
4045	u32 vf_if_cap_flags = res.vf_if_cap_flags;
4046	struct be_resources res_mod = {0};
4047	u16 num_vf_qs = 1;
4048
4049	/* Distribute the queue resources among the PF and it's VFs */
4050	if (num_vfs) {
4051		/* Divide the rx queues evenly among the VFs and the PF, capped
4052		 * at VF-EQ-count. Any remainder queues belong to the PF.
4053		 */
4054		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4055				res.max_rss_qs / (num_vfs + 1));
4056
4057		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4058		 * RSS Tables per port. Provide RSS on VFs, only if number of
4059		 * VFs requested is less than it's PF Pool's RSS Tables limit.
4060		 */
4061		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4062			num_vf_qs = 1;
4063	}
4064
4065	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4066	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4067	 */
4068	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4069				  RESOURCE_MODIFIABLE, 0);
4070
4071	/* If RSS IFACE capability flags are modifiable for a VF, set the
4072	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4073	 * more than 1 RSSQ is available for a VF.
4074	 * Otherwise, provision only 1 queue pair for VF.
4075	 */
4076	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4077		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4078		if (num_vf_qs > 1) {
4079			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4080			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4081				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4082		} else {
4083			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4084					     BE_IF_FLAGS_DEFQ_RSS);
4085		}
4086	} else {
4087		num_vf_qs = 1;
4088	}
4089
4090	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4091		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4092		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4093	}
4094
4095	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4096	vft_res->max_rx_qs = num_vf_qs;
4097	vft_res->max_rss_qs = num_vf_qs;
4098	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4099	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4100
4101	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4102	 * among the PF and it's VFs, if the fields are changeable
4103	 */
4104	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4105		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4106
4107	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4108		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4109
4110	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4111		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4112
4113	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4114		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4115}
4116
4117static void be_if_destroy(struct be_adapter *adapter)
4118{
4119	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4120
4121	kfree(adapter->pmac_id);
4122	adapter->pmac_id = NULL;
4123
4124	kfree(adapter->mc_list);
4125	adapter->mc_list = NULL;
4126
4127	kfree(adapter->uc_list);
4128	adapter->uc_list = NULL;
4129}
4130
4131static int be_clear(struct be_adapter *adapter)
4132{
4133	struct pci_dev *pdev = adapter->pdev;
4134	struct  be_resources vft_res = {0};
4135
4136	be_cancel_worker(adapter);
4137
4138	flush_workqueue(be_wq);
4139
4140	if (sriov_enabled(adapter))
4141		be_vf_clear(adapter);
4142
4143	/* Re-configure FW to distribute resources evenly across max-supported
4144	 * number of VFs, only when VFs are not already enabled.
4145	 */
4146	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4147	    !pci_vfs_assigned(pdev)) {
4148		be_calculate_vf_res(adapter,
4149				    pci_sriov_get_totalvfs(pdev),
4150				    &vft_res);
4151		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4152					pci_sriov_get_totalvfs(pdev),
4153					&vft_res);
4154	}
4155
4156	be_vxlan_unset_port(adapter->netdev, 0, 0, NULL);
4157
4158	be_if_destroy(adapter);
4159
4160	be_clear_queues(adapter);
4161
4162	be_msix_disable(adapter);
4163	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4164	return 0;
4165}
4166
4167static int be_vfs_if_create(struct be_adapter *adapter)
4168{
4169	struct be_resources res = {0};
4170	u32 cap_flags, en_flags, vf;
4171	struct be_vf_cfg *vf_cfg;
4172	int status;
4173
4174	/* If a FW profile exists, then cap_flags are updated */
4175	cap_flags = BE_VF_IF_EN_FLAGS;
4176
4177	for_all_vfs(adapter, vf_cfg, vf) {
4178		if (!BE3_chip(adapter)) {
4179			status = be_cmd_get_profile_config(adapter, &res, NULL,
4180							   ACTIVE_PROFILE_TYPE,
4181							   RESOURCE_LIMITS,
4182							   vf + 1);
4183			if (!status) {
4184				cap_flags = res.if_cap_flags;
4185				/* Prevent VFs from enabling VLAN promiscuous
4186				 * mode
4187				 */
4188				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4189			}
4190		}
4191
4192		/* PF should enable IF flags during proxy if_create call */
4193		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4194		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4195					  &vf_cfg->if_handle, vf + 1);
4196		if (status)
4197			return status;
4198	}
4199
4200	return 0;
4201}
4202
4203static int be_vf_setup_init(struct be_adapter *adapter)
4204{
4205	struct be_vf_cfg *vf_cfg;
4206	int vf;
4207
4208	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4209				  GFP_KERNEL);
4210	if (!adapter->vf_cfg)
4211		return -ENOMEM;
4212
4213	for_all_vfs(adapter, vf_cfg, vf) {
4214		vf_cfg->if_handle = -1;
4215		vf_cfg->pmac_id = -1;
4216	}
4217	return 0;
4218}
4219
4220static int be_vf_setup(struct be_adapter *adapter)
4221{
4222	struct device *dev = &adapter->pdev->dev;
4223	struct be_vf_cfg *vf_cfg;
4224	int status, old_vfs, vf;
4225	bool spoofchk;
4226
4227	old_vfs = pci_num_vf(adapter->pdev);
4228
4229	status = be_vf_setup_init(adapter);
4230	if (status)
4231		goto err;
4232
4233	if (old_vfs) {
4234		for_all_vfs(adapter, vf_cfg, vf) {
4235			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4236			if (status)
4237				goto err;
4238		}
4239
4240		status = be_vfs_mac_query(adapter);
4241		if (status)
4242			goto err;
4243	} else {
4244		status = be_vfs_if_create(adapter);
4245		if (status)
4246			goto err;
4247
4248		status = be_vf_eth_addr_config(adapter);
4249		if (status)
4250			goto err;
4251	}
4252
4253	for_all_vfs(adapter, vf_cfg, vf) {
4254		/* Allow VFs to programs MAC/VLAN filters */
4255		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4256						  vf + 1);
4257		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4258			status = be_cmd_set_fn_privileges(adapter,
4259							  vf_cfg->privileges |
4260							  BE_PRIV_FILTMGMT,
4261							  vf + 1);
4262			if (!status) {
4263				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4264				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4265					 vf);
4266			}
4267		}
4268
4269		/* Allow full available bandwidth */
4270		if (!old_vfs)
4271			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4272
4273		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4274					       vf_cfg->if_handle, NULL,
4275					       &spoofchk);
4276		if (!status)
4277			vf_cfg->spoofchk = spoofchk;
4278
4279		if (!old_vfs) {
4280			be_cmd_enable_vf(adapter, vf + 1);
4281			be_cmd_set_logical_link_config(adapter,
4282						       IFLA_VF_LINK_STATE_AUTO,
4283						       vf+1);
4284		}
4285	}
4286
4287	if (!old_vfs) {
4288		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4289		if (status) {
4290			dev_err(dev, "SRIOV enable failed\n");
4291			adapter->num_vfs = 0;
4292			goto err;
4293		}
4294	}
4295
4296	if (BE3_chip(adapter)) {
4297		/* On BE3, enable VEB only when SRIOV is enabled */
4298		status = be_cmd_set_hsw_config(adapter, 0, 0,
4299					       adapter->if_handle,
4300					       PORT_FWD_TYPE_VEB, 0);
4301		if (status)
4302			goto err;
4303	}
4304
4305	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4306	return 0;
4307err:
4308	dev_err(dev, "VF setup failed\n");
4309	be_vf_clear(adapter);
4310	return status;
4311}
4312
4313/* Converting function_mode bits on BE3 to SH mc_type enums */
4314
4315static u8 be_convert_mc_type(u32 function_mode)
4316{
4317	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4318		return vNIC1;
4319	else if (function_mode & QNQ_MODE)
4320		return FLEX10;
4321	else if (function_mode & VNIC_MODE)
4322		return vNIC2;
4323	else if (function_mode & UMC_ENABLED)
4324		return UMC;
4325	else
4326		return MC_NONE;
4327}
4328
4329/* On BE2/BE3 FW does not suggest the supported limits */
4330static void BEx_get_resources(struct be_adapter *adapter,
4331			      struct be_resources *res)
4332{
4333	bool use_sriov = adapter->num_vfs ? 1 : 0;
4334
4335	if (be_physfn(adapter))
4336		res->max_uc_mac = BE_UC_PMAC_COUNT;
4337	else
4338		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4339
4340	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4341
4342	if (be_is_mc(adapter)) {
4343		/* Assuming that there are 4 channels per port,
4344		 * when multi-channel is enabled
4345		 */
4346		if (be_is_qnq_mode(adapter))
4347			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4348		else
4349			/* In a non-qnq multichannel mode, the pvid
4350			 * takes up one vlan entry
4351			 */
4352			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4353	} else {
4354		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4355	}
4356
4357	res->max_mcast_mac = BE_MAX_MC;
4358
4359	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4360	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4361	 *    *only* if it is RSS-capable.
4362	 */
4363	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4364	    be_virtfn(adapter) ||
4365	    (be_is_mc(adapter) &&
4366	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4367		res->max_tx_qs = 1;
4368	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4369		struct be_resources super_nic_res = {0};
4370
4371		/* On a SuperNIC profile, the driver needs to use the
4372		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4373		 */
4374		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4375					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4376					  0);
4377		/* Some old versions of BE3 FW don't report max_tx_qs value */
4378		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4379	} else {
4380		res->max_tx_qs = BE3_MAX_TX_QS;
4381	}
4382
4383	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4384	    !use_sriov && be_physfn(adapter))
4385		res->max_rss_qs = (adapter->be3_native) ?
4386					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4387	res->max_rx_qs = res->max_rss_qs + 1;
4388
4389	if (be_physfn(adapter))
4390		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4391					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4392	else
4393		res->max_evt_qs = 1;
4394
4395	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4396	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4397	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4398		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4399}
4400
4401static void be_setup_init(struct be_adapter *adapter)
4402{
4403	adapter->vlan_prio_bmap = 0xff;
4404	adapter->phy.link_speed = -1;
4405	adapter->if_handle = -1;
4406	adapter->be3_native = false;
4407	adapter->if_flags = 0;
4408	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4409	if (be_physfn(adapter))
4410		adapter->cmd_privileges = MAX_PRIVILEGES;
4411	else
4412		adapter->cmd_privileges = MIN_PRIVILEGES;
4413}
4414
4415/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4416 * However, this HW limitation is not exposed to the host via any SLI cmd.
4417 * As a result, in the case of SRIOV and in particular multi-partition configs
4418 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4419 * for distribution between the VFs. This self-imposed limit will determine the
4420 * no: of VFs for which RSS can be enabled.
4421 */
4422static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4423{
4424	struct be_port_resources port_res = {0};
4425	u8 rss_tables_on_port;
4426	u16 max_vfs = be_max_vfs(adapter);
4427
4428	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4429				  RESOURCE_LIMITS, 0);
4430
4431	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4432
4433	/* Each PF Pool's RSS Tables limit =
4434	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4435	 */
4436	adapter->pool_res.max_rss_tables =
4437		max_vfs * rss_tables_on_port / port_res.max_vfs;
4438}
4439
4440static int be_get_sriov_config(struct be_adapter *adapter)
4441{
4442	struct be_resources res = {0};
4443	int max_vfs, old_vfs;
4444
4445	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4446				  RESOURCE_LIMITS, 0);
4447
4448	/* Some old versions of BE3 FW don't report max_vfs value */
4449	if (BE3_chip(adapter) && !res.max_vfs) {
4450		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4451		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4452	}
4453
4454	adapter->pool_res = res;
4455
4456	/* If during previous unload of the driver, the VFs were not disabled,
4457	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4458	 * Instead use the TotalVFs value stored in the pci-dev struct.
4459	 */
4460	old_vfs = pci_num_vf(adapter->pdev);
4461	if (old_vfs) {
4462		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4463			 old_vfs);
4464
4465		adapter->pool_res.max_vfs =
4466			pci_sriov_get_totalvfs(adapter->pdev);
4467		adapter->num_vfs = old_vfs;
4468	}
4469
4470	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4471		be_calculate_pf_pool_rss_tables(adapter);
4472		dev_info(&adapter->pdev->dev,
4473			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4474			 be_max_pf_pool_rss_tables(adapter));
4475	}
4476	return 0;
4477}
4478
4479static void be_alloc_sriov_res(struct be_adapter *adapter)
4480{
4481	int old_vfs = pci_num_vf(adapter->pdev);
4482	struct  be_resources vft_res = {0};
4483	int status;
4484
4485	be_get_sriov_config(adapter);
4486
4487	if (!old_vfs)
4488		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4489
4490	/* When the HW is in SRIOV capable configuration, the PF-pool
4491	 * resources are given to PF during driver load, if there are no
4492	 * old VFs. This facility is not available in BE3 FW.
4493	 * Also, this is done by FW in Lancer chip.
4494	 */
4495	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4496		be_calculate_vf_res(adapter, 0, &vft_res);
4497		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4498						 &vft_res);
4499		if (status)
4500			dev_err(&adapter->pdev->dev,
4501				"Failed to optimize SRIOV resources\n");
4502	}
4503}
4504
4505static int be_get_resources(struct be_adapter *adapter)
4506{
4507	struct device *dev = &adapter->pdev->dev;
4508	struct be_resources res = {0};
4509	int status;
4510
4511	/* For Lancer, SH etc read per-function resource limits from FW.
4512	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4513	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4514	 */
4515	if (BEx_chip(adapter)) {
4516		BEx_get_resources(adapter, &res);
4517	} else {
4518		status = be_cmd_get_func_config(adapter, &res);
4519		if (status)
4520			return status;
4521
4522		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4523		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4524		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4525			res.max_rss_qs -= 1;
4526	}
4527
4528	/* If RoCE is supported stash away half the EQs for RoCE */
4529	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4530				res.max_evt_qs / 2 : res.max_evt_qs;
4531	adapter->res = res;
4532
4533	/* If FW supports RSS default queue, then skip creating non-RSS
4534	 * queue for non-IP traffic.
4535	 */
4536	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4537				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4538
4539	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4540		 be_max_txqs(adapter), be_max_rxqs(adapter),
4541		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4542		 be_max_vfs(adapter));
4543	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4544		 be_max_uc(adapter), be_max_mc(adapter),
4545		 be_max_vlans(adapter));
4546
4547	/* Ensure RX and TX queues are created in pairs at init time */
4548	adapter->cfg_num_rx_irqs =
4549				min_t(u16, netif_get_num_default_rss_queues(),
4550				      be_max_qp_irqs(adapter));
4551	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4552	return 0;
4553}
4554
4555static int be_get_config(struct be_adapter *adapter)
4556{
4557	int status, level;
4558	u16 profile_id;
4559
4560	status = be_cmd_get_cntl_attributes(adapter);
4561	if (status)
4562		return status;
4563
4564	status = be_cmd_query_fw_cfg(adapter);
4565	if (status)
4566		return status;
4567
4568	if (!lancer_chip(adapter) && be_physfn(adapter))
4569		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4570
4571	if (BEx_chip(adapter)) {
4572		level = be_cmd_get_fw_log_level(adapter);
4573		adapter->msg_enable =
4574			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4575	}
4576
4577	be_cmd_get_acpi_wol_cap(adapter);
4578	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4579	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4580
4581	be_cmd_query_port_name(adapter);
4582
4583	if (be_physfn(adapter)) {
4584		status = be_cmd_get_active_profile(adapter, &profile_id);
4585		if (!status)
4586			dev_info(&adapter->pdev->dev,
4587				 "Using profile 0x%x\n", profile_id);
4588	}
4589
4590	return 0;
4591}
4592
4593static int be_mac_setup(struct be_adapter *adapter)
4594{
4595	u8 mac[ETH_ALEN];
4596	int status;
4597
4598	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4599		status = be_cmd_get_perm_mac(adapter, mac);
4600		if (status)
4601			return status;
4602
4603		eth_hw_addr_set(adapter->netdev, mac);
4604		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4605
4606		/* Initial MAC for BE3 VFs is already programmed by PF */
4607		if (BEx_chip(adapter) && be_virtfn(adapter))
4608			memcpy(adapter->dev_mac, mac, ETH_ALEN);
4609	}
4610
4611	return 0;
4612}
4613
4614static void be_schedule_worker(struct be_adapter *adapter)
4615{
4616	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4617	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4618}
4619
4620static void be_destroy_err_recovery_workq(void)
4621{
4622	if (!be_err_recovery_workq)
4623		return;
4624
4625	destroy_workqueue(be_err_recovery_workq);
4626	be_err_recovery_workq = NULL;
4627}
4628
4629static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4630{
4631	struct be_error_recovery *err_rec = &adapter->error_recovery;
4632
4633	if (!be_err_recovery_workq)
4634		return;
4635
4636	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4637			   msecs_to_jiffies(delay));
4638	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4639}
4640
4641static int be_setup_queues(struct be_adapter *adapter)
4642{
4643	struct net_device *netdev = adapter->netdev;
4644	int status;
4645
4646	status = be_evt_queues_create(adapter);
4647	if (status)
4648		goto err;
4649
4650	status = be_tx_qs_create(adapter);
4651	if (status)
4652		goto err;
4653
4654	status = be_rx_cqs_create(adapter);
4655	if (status)
4656		goto err;
4657
4658	status = be_mcc_queues_create(adapter);
4659	if (status)
4660		goto err;
4661
4662	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4663	if (status)
4664		goto err;
4665
4666	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4667	if (status)
4668		goto err;
4669
4670	return 0;
4671err:
4672	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4673	return status;
4674}
4675
4676static int be_if_create(struct be_adapter *adapter)
4677{
4678	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4679	u32 cap_flags = be_if_cap_flags(adapter);
4680
4681	/* alloc required memory for other filtering fields */
4682	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4683				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4684	if (!adapter->pmac_id)
4685		return -ENOMEM;
4686
4687	adapter->mc_list = kcalloc(be_max_mc(adapter),
4688				   sizeof(*adapter->mc_list), GFP_KERNEL);
4689	if (!adapter->mc_list)
4690		return -ENOMEM;
4691
4692	adapter->uc_list = kcalloc(be_max_uc(adapter),
4693				   sizeof(*adapter->uc_list), GFP_KERNEL);
4694	if (!adapter->uc_list)
4695		return -ENOMEM;
4696
4697	if (adapter->cfg_num_rx_irqs == 1)
4698		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4699
4700	en_flags &= cap_flags;
4701	/* will enable all the needed filter flags in be_open() */
4702	return be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4703				  &adapter->if_handle, 0);
4704}
4705
4706int be_update_queues(struct be_adapter *adapter)
4707{
4708	struct net_device *netdev = adapter->netdev;
4709	int status;
4710
4711	if (netif_running(netdev)) {
4712		/* be_tx_timeout() must not run concurrently with this
4713		 * function, synchronize with an already-running dev_watchdog
4714		 */
4715		netif_tx_lock_bh(netdev);
4716		/* device cannot transmit now, avoid dev_watchdog timeouts */
4717		netif_carrier_off(netdev);
4718		netif_tx_unlock_bh(netdev);
4719
4720		be_close(netdev);
4721	}
4722
4723	be_cancel_worker(adapter);
4724
4725	/* If any vectors have been shared with RoCE we cannot re-program
4726	 * the MSIx table.
4727	 */
4728	if (!adapter->num_msix_roce_vec)
4729		be_msix_disable(adapter);
4730
4731	be_clear_queues(adapter);
4732	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4733	if (status)
4734		return status;
4735
4736	if (!msix_enabled(adapter)) {
4737		status = be_msix_enable(adapter);
4738		if (status)
4739			return status;
4740	}
4741
4742	status = be_if_create(adapter);
4743	if (status)
4744		return status;
4745
4746	status = be_setup_queues(adapter);
4747	if (status)
4748		return status;
4749
4750	be_schedule_worker(adapter);
4751
4752	/* The IF was destroyed and re-created. We need to clear
4753	 * all promiscuous flags valid for the destroyed IF.
4754	 * Without this promisc mode is not restored during
4755	 * be_open() because the driver thinks that it is
4756	 * already enabled in HW.
4757	 */
4758	adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4759
4760	if (netif_running(netdev))
4761		status = be_open(netdev);
4762
4763	return status;
4764}
4765
4766static inline int fw_major_num(const char *fw_ver)
4767{
4768	int fw_major = 0, i;
4769
4770	i = sscanf(fw_ver, "%d.", &fw_major);
4771	if (i != 1)
4772		return 0;
4773
4774	return fw_major;
4775}
4776
4777/* If it is error recovery, FLR the PF
4778 * Else if any VFs are already enabled don't FLR the PF
4779 */
4780static bool be_reset_required(struct be_adapter *adapter)
4781{
4782	if (be_error_recovering(adapter))
4783		return true;
4784	else
4785		return pci_num_vf(adapter->pdev) == 0;
4786}
4787
4788/* Wait for the FW to be ready and perform the required initialization */
4789static int be_func_init(struct be_adapter *adapter)
4790{
4791	int status;
4792
4793	status = be_fw_wait_ready(adapter);
4794	if (status)
4795		return status;
4796
4797	/* FW is now ready; clear errors to allow cmds/doorbell */
4798	be_clear_error(adapter, BE_CLEAR_ALL);
4799
4800	if (be_reset_required(adapter)) {
4801		status = be_cmd_reset_function(adapter);
4802		if (status)
4803			return status;
4804
4805		/* Wait for interrupts to quiesce after an FLR */
4806		msleep(100);
4807	}
4808
4809	/* Tell FW we're ready to fire cmds */
4810	status = be_cmd_fw_init(adapter);
4811	if (status)
4812		return status;
4813
4814	/* Allow interrupts for other ULPs running on NIC function */
4815	be_intr_set(adapter, true);
4816
4817	return 0;
4818}
4819
4820static int be_setup(struct be_adapter *adapter)
4821{
4822	struct device *dev = &adapter->pdev->dev;
4823	int status;
4824
4825	status = be_func_init(adapter);
4826	if (status)
4827		return status;
4828
4829	be_setup_init(adapter);
4830
4831	if (!lancer_chip(adapter))
4832		be_cmd_req_native_mode(adapter);
4833
4834	/* invoke this cmd first to get pf_num and vf_num which are needed
4835	 * for issuing profile related cmds
4836	 */
4837	if (!BEx_chip(adapter)) {
4838		status = be_cmd_get_func_config(adapter, NULL);
4839		if (status)
4840			return status;
4841	}
4842
4843	status = be_get_config(adapter);
4844	if (status)
4845		goto err;
4846
4847	if (!BE2_chip(adapter) && be_physfn(adapter))
4848		be_alloc_sriov_res(adapter);
4849
4850	status = be_get_resources(adapter);
4851	if (status)
4852		goto err;
4853
4854	status = be_msix_enable(adapter);
4855	if (status)
4856		goto err;
4857
4858	/* will enable all the needed filter flags in be_open() */
4859	status = be_if_create(adapter);
4860	if (status)
4861		goto err;
4862
4863	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4864	rtnl_lock();
4865	status = be_setup_queues(adapter);
4866	rtnl_unlock();
4867	if (status)
4868		goto err;
4869
4870	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4871
4872	status = be_mac_setup(adapter);
4873	if (status)
4874		goto err;
4875
4876	be_cmd_get_fw_ver(adapter);
4877	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4878
4879	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4880		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4881			adapter->fw_ver);
4882		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4883	}
4884
4885	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4886					 adapter->rx_fc);
4887	if (status)
4888		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4889					&adapter->rx_fc);
4890
4891	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4892		 adapter->tx_fc, adapter->rx_fc);
4893
4894	if (be_physfn(adapter))
4895		be_cmd_set_logical_link_config(adapter,
4896					       IFLA_VF_LINK_STATE_AUTO, 0);
4897
4898	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4899	 * confusing a linux bridge or OVS that it might be connected to.
4900	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4901	 * when SRIOV is not enabled.
4902	 */
4903	if (BE3_chip(adapter))
4904		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4905				      PORT_FWD_TYPE_PASSTHRU, 0);
4906
4907	if (adapter->num_vfs)
4908		be_vf_setup(adapter);
4909
4910	status = be_cmd_get_phy_info(adapter);
4911	if (!status && be_pause_supported(adapter))
4912		adapter->phy.fc_autoneg = 1;
4913
4914	if (be_physfn(adapter) && !lancer_chip(adapter))
4915		be_cmd_set_features(adapter);
4916
4917	be_schedule_worker(adapter);
4918	adapter->flags |= BE_FLAGS_SETUP_DONE;
4919	return 0;
4920err:
4921	be_clear(adapter);
4922	return status;
4923}
4924
4925#ifdef CONFIG_NET_POLL_CONTROLLER
4926static void be_netpoll(struct net_device *netdev)
4927{
4928	struct be_adapter *adapter = netdev_priv(netdev);
4929	struct be_eq_obj *eqo;
4930	int i;
4931
4932	for_all_evt_queues(adapter, eqo, i) {
4933		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4934		napi_schedule(&eqo->napi);
4935	}
4936}
4937#endif
4938
4939int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4940{
4941	const struct firmware *fw;
4942	int status;
4943
4944	if (!netif_running(adapter->netdev)) {
4945		dev_err(&adapter->pdev->dev,
4946			"Firmware load not allowed (interface is down)\n");
4947		return -ENETDOWN;
4948	}
4949
4950	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4951	if (status)
4952		goto fw_exit;
4953
4954	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4955
4956	if (lancer_chip(adapter))
4957		status = lancer_fw_download(adapter, fw);
4958	else
4959		status = be_fw_download(adapter, fw);
4960
4961	if (!status)
4962		be_cmd_get_fw_ver(adapter);
4963
4964fw_exit:
4965	release_firmware(fw);
4966	return status;
4967}
4968
4969static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4970				 u16 flags, struct netlink_ext_ack *extack)
4971{
4972	struct be_adapter *adapter = netdev_priv(dev);
4973	struct nlattr *attr, *br_spec;
4974	int rem;
4975	int status = 0;
4976	u16 mode = 0;
4977
4978	if (!sriov_enabled(adapter))
4979		return -EOPNOTSUPP;
4980
4981	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4982	if (!br_spec)
4983		return -EINVAL;
4984
4985	nla_for_each_nested(attr, br_spec, rem) {
4986		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4987			continue;
4988
4989		mode = nla_get_u16(attr);
4990		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4991			return -EOPNOTSUPP;
4992
4993		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4994			return -EINVAL;
4995
4996		status = be_cmd_set_hsw_config(adapter, 0, 0,
4997					       adapter->if_handle,
4998					       mode == BRIDGE_MODE_VEPA ?
4999					       PORT_FWD_TYPE_VEPA :
5000					       PORT_FWD_TYPE_VEB, 0);
5001		if (status)
5002			goto err;
5003
5004		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
5005			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5006
5007		return status;
5008	}
5009err:
5010	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5011		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5012
5013	return status;
5014}
5015
5016static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5017				 struct net_device *dev, u32 filter_mask,
5018				 int nlflags)
5019{
5020	struct be_adapter *adapter = netdev_priv(dev);
5021	int status = 0;
5022	u8 hsw_mode;
5023
5024	/* BE and Lancer chips support VEB mode only */
5025	if (BEx_chip(adapter) || lancer_chip(adapter)) {
5026		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5027		if (!pci_sriov_get_totalvfs(adapter->pdev))
5028			return 0;
5029		hsw_mode = PORT_FWD_TYPE_VEB;
5030	} else {
5031		status = be_cmd_get_hsw_config(adapter, NULL, 0,
5032					       adapter->if_handle, &hsw_mode,
5033					       NULL);
5034		if (status)
5035			return 0;
5036
5037		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5038			return 0;
5039	}
5040
5041	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5042				       hsw_mode == PORT_FWD_TYPE_VEPA ?
5043				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5044				       0, 0, nlflags, filter_mask, NULL);
5045}
5046
5047static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5048					 void (*func)(struct work_struct *))
5049{
5050	struct be_cmd_work *work;
5051
5052	work = kzalloc(sizeof(*work), GFP_ATOMIC);
5053	if (!work) {
5054		dev_err(&adapter->pdev->dev,
5055			"be_work memory allocation failed\n");
5056		return NULL;
5057	}
5058
5059	INIT_WORK(&work->work, func);
5060	work->adapter = adapter;
5061	return work;
5062}
5063
5064static netdev_features_t be_features_check(struct sk_buff *skb,
5065					   struct net_device *dev,
5066					   netdev_features_t features)
5067{
5068	struct be_adapter *adapter = netdev_priv(dev);
5069	u8 l4_hdr = 0;
5070
5071	if (skb_is_gso(skb)) {
5072		/* IPv6 TSO requests with extension hdrs are a problem
5073		 * to Lancer and BE3 HW. Disable TSO6 feature.
5074		 */
5075		if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5076			features &= ~NETIF_F_TSO6;
5077
5078		/* Lancer cannot handle the packet with MSS less than 256.
5079		 * Also it can't handle a TSO packet with a single segment
5080		 * Disable the GSO support in such cases
5081		 */
5082		if (lancer_chip(adapter) &&
5083		    (skb_shinfo(skb)->gso_size < 256 ||
5084		     skb_shinfo(skb)->gso_segs == 1))
5085			features &= ~NETIF_F_GSO_MASK;
5086	}
5087
5088	/* The code below restricts offload features for some tunneled and
5089	 * Q-in-Q packets.
5090	 * Offload features for normal (non tunnel) packets are unchanged.
5091	 */
5092	features = vlan_features_check(skb, features);
5093	if (!skb->encapsulation ||
5094	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5095		return features;
5096
5097	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5098	 * should disable tunnel offload features if it's not a VxLAN packet,
5099	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5100	 * allow other tunneled traffic like GRE work fine while VxLAN
5101	 * offloads are configured in Skyhawk-R.
5102	 */
5103	switch (vlan_get_protocol(skb)) {
5104	case htons(ETH_P_IP):
5105		l4_hdr = ip_hdr(skb)->protocol;
5106		break;
5107	case htons(ETH_P_IPV6):
5108		l4_hdr = ipv6_hdr(skb)->nexthdr;
5109		break;
5110	default:
5111		return features;
5112	}
5113
5114	if (l4_hdr != IPPROTO_UDP ||
5115	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5116	    skb->inner_protocol != htons(ETH_P_TEB) ||
5117	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5118		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5119	    !adapter->vxlan_port ||
5120	    udp_hdr(skb)->dest != adapter->vxlan_port)
5121		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5122
5123	return features;
5124}
5125
5126static int be_get_phys_port_id(struct net_device *dev,
5127			       struct netdev_phys_item_id *ppid)
5128{
5129	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5130	struct be_adapter *adapter = netdev_priv(dev);
5131	u8 *id;
5132
5133	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5134		return -ENOSPC;
5135
5136	ppid->id[0] = adapter->hba_port_num + 1;
5137	id = &ppid->id[1];
5138	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5139	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5140		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5141
5142	ppid->id_len = id_len;
5143
5144	return 0;
5145}
5146
5147static void be_set_rx_mode(struct net_device *dev)
5148{
5149	struct be_adapter *adapter = netdev_priv(dev);
5150	struct be_cmd_work *work;
5151
5152	work = be_alloc_work(adapter, be_work_set_rx_mode);
5153	if (work)
5154		queue_work(be_wq, &work->work);
5155}
5156
5157static const struct net_device_ops be_netdev_ops = {
5158	.ndo_open		= be_open,
5159	.ndo_stop		= be_close,
5160	.ndo_start_xmit		= be_xmit,
5161	.ndo_set_rx_mode	= be_set_rx_mode,
5162	.ndo_set_mac_address	= be_mac_addr_set,
5163	.ndo_get_stats64	= be_get_stats64,
5164	.ndo_validate_addr	= eth_validate_addr,
5165	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5166	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5167	.ndo_set_vf_mac		= be_set_vf_mac,
5168	.ndo_set_vf_vlan	= be_set_vf_vlan,
5169	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5170	.ndo_get_vf_config	= be_get_vf_config,
5171	.ndo_set_vf_link_state  = be_set_vf_link_state,
5172	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5173	.ndo_tx_timeout		= be_tx_timeout,
5174#ifdef CONFIG_NET_POLL_CONTROLLER
5175	.ndo_poll_controller	= be_netpoll,
5176#endif
5177	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5178	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5179	.ndo_features_check	= be_features_check,
5180	.ndo_get_phys_port_id   = be_get_phys_port_id,
5181};
5182
5183static void be_netdev_init(struct net_device *netdev)
5184{
5185	struct be_adapter *adapter = netdev_priv(netdev);
5186
5187	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5188		NETIF_F_GSO_UDP_TUNNEL |
5189		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5190		NETIF_F_HW_VLAN_CTAG_TX;
5191	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5192		netdev->hw_features |= NETIF_F_RXHASH;
5193
5194	netdev->features |= netdev->hw_features |
5195		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER |
5196		NETIF_F_HIGHDMA;
5197
5198	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5199		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5200
5201	netdev->priv_flags |= IFF_UNICAST_FLT;
5202
5203	netdev->flags |= IFF_MULTICAST;
5204
5205	netif_set_tso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5206
5207	netdev->netdev_ops = &be_netdev_ops;
5208
5209	netdev->ethtool_ops = &be_ethtool_ops;
5210
5211	if (!lancer_chip(adapter) && !BEx_chip(adapter) && !be_is_mc(adapter))
5212		netdev->udp_tunnel_nic_info = &be_udp_tunnels;
5213
5214	/* MTU range: 256 - 9000 */
5215	netdev->min_mtu = BE_MIN_MTU;
5216	netdev->max_mtu = BE_MAX_MTU;
5217}
5218
5219static void be_cleanup(struct be_adapter *adapter)
5220{
5221	struct net_device *netdev = adapter->netdev;
5222
5223	rtnl_lock();
5224	netif_device_detach(netdev);
5225	if (netif_running(netdev))
5226		be_close(netdev);
5227	rtnl_unlock();
5228
5229	be_clear(adapter);
5230}
5231
5232static int be_resume(struct be_adapter *adapter)
5233{
5234	struct net_device *netdev = adapter->netdev;
5235	int status;
5236
5237	status = be_setup(adapter);
5238	if (status)
5239		return status;
5240
5241	rtnl_lock();
5242	if (netif_running(netdev))
5243		status = be_open(netdev);
5244	rtnl_unlock();
5245
5246	if (status)
5247		return status;
5248
5249	netif_device_attach(netdev);
5250
5251	return 0;
5252}
5253
5254static void be_soft_reset(struct be_adapter *adapter)
5255{
5256	u32 val;
5257
5258	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5259	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5260	val |= SLIPORT_SOFTRESET_SR_MASK;
5261	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5262}
5263
5264static bool be_err_is_recoverable(struct be_adapter *adapter)
5265{
5266	struct be_error_recovery *err_rec = &adapter->error_recovery;
5267	unsigned long initial_idle_time =
5268		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5269	unsigned long recovery_interval =
5270		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5271	u16 ue_err_code;
5272	u32 val;
5273
5274	val = be_POST_stage_get(adapter);
5275	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5276		return false;
5277	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5278	if (ue_err_code == 0)
5279		return false;
5280
5281	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5282		ue_err_code);
5283
5284	if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5285		dev_err(&adapter->pdev->dev,
5286			"Cannot recover within %lu sec from driver load\n",
5287			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5288		return false;
5289	}
5290
5291	if (err_rec->last_recovery_time && time_before_eq(
5292		jiffies - err_rec->last_recovery_time, recovery_interval)) {
5293		dev_err(&adapter->pdev->dev,
5294			"Cannot recover within %lu sec from last recovery\n",
5295			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5296		return false;
5297	}
5298
5299	if (ue_err_code == err_rec->last_err_code) {
5300		dev_err(&adapter->pdev->dev,
5301			"Cannot recover from a consecutive TPE error\n");
5302		return false;
5303	}
5304
5305	err_rec->last_recovery_time = jiffies;
5306	err_rec->last_err_code = ue_err_code;
5307	return true;
5308}
5309
5310static int be_tpe_recover(struct be_adapter *adapter)
5311{
5312	struct be_error_recovery *err_rec = &adapter->error_recovery;
5313	int status = -EAGAIN;
5314	u32 val;
5315
5316	switch (err_rec->recovery_state) {
5317	case ERR_RECOVERY_ST_NONE:
5318		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5319		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5320		break;
5321
5322	case ERR_RECOVERY_ST_DETECT:
5323		val = be_POST_stage_get(adapter);
5324		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5325		    POST_STAGE_RECOVERABLE_ERR) {
5326			dev_err(&adapter->pdev->dev,
5327				"Unrecoverable HW error detected: 0x%x\n", val);
5328			status = -EINVAL;
5329			err_rec->resched_delay = 0;
5330			break;
5331		}
5332
5333		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5334
5335		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5336		 * milliseconds before it checks for final error status in
5337		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5338		 * If it does, then PF0 initiates a Soft Reset.
5339		 */
5340		if (adapter->pf_num == 0) {
5341			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5342			err_rec->resched_delay = err_rec->ue_to_reset_time -
5343					ERR_RECOVERY_UE_DETECT_DURATION;
5344			break;
5345		}
5346
5347		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5348		err_rec->resched_delay = err_rec->ue_to_poll_time -
5349					ERR_RECOVERY_UE_DETECT_DURATION;
5350		break;
5351
5352	case ERR_RECOVERY_ST_RESET:
5353		if (!be_err_is_recoverable(adapter)) {
5354			dev_err(&adapter->pdev->dev,
5355				"Failed to meet recovery criteria\n");
5356			status = -EIO;
5357			err_rec->resched_delay = 0;
5358			break;
5359		}
5360		be_soft_reset(adapter);
5361		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5362		err_rec->resched_delay = err_rec->ue_to_poll_time -
5363					err_rec->ue_to_reset_time;
5364		break;
5365
5366	case ERR_RECOVERY_ST_PRE_POLL:
5367		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5368		err_rec->resched_delay = 0;
5369		status = 0;			/* done */
5370		break;
5371
5372	default:
5373		status = -EINVAL;
5374		err_rec->resched_delay = 0;
5375		break;
5376	}
5377
5378	return status;
5379}
5380
5381static int be_err_recover(struct be_adapter *adapter)
5382{
5383	int status;
5384
5385	if (!lancer_chip(adapter)) {
5386		if (!adapter->error_recovery.recovery_supported ||
5387		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5388			return -EIO;
5389		status = be_tpe_recover(adapter);
5390		if (status)
5391			goto err;
5392	}
5393
5394	/* Wait for adapter to reach quiescent state before
5395	 * destroying queues
5396	 */
5397	status = be_fw_wait_ready(adapter);
5398	if (status)
5399		goto err;
5400
5401	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5402
5403	be_cleanup(adapter);
5404
5405	status = be_resume(adapter);
5406	if (status)
5407		goto err;
5408
5409	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5410
5411err:
5412	return status;
5413}
5414
5415static void be_err_detection_task(struct work_struct *work)
5416{
5417	struct be_error_recovery *err_rec =
5418			container_of(work, struct be_error_recovery,
5419				     err_detection_work.work);
5420	struct be_adapter *adapter =
5421			container_of(err_rec, struct be_adapter,
5422				     error_recovery);
5423	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5424	struct device *dev = &adapter->pdev->dev;
5425	int recovery_status;
5426
5427	be_detect_error(adapter);
5428	if (!be_check_error(adapter, BE_ERROR_HW))
5429		goto reschedule_task;
5430
5431	recovery_status = be_err_recover(adapter);
5432	if (!recovery_status) {
5433		err_rec->recovery_retries = 0;
5434		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5435		dev_info(dev, "Adapter recovery successful\n");
5436		goto reschedule_task;
5437	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5438		/* BEx/SH recovery state machine */
5439		if (adapter->pf_num == 0 &&
5440		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5441			dev_err(&adapter->pdev->dev,
5442				"Adapter recovery in progress\n");
5443		resched_delay = err_rec->resched_delay;
5444		goto reschedule_task;
5445	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5446		/* For VFs, check if PF have allocated resources
5447		 * every second.
5448		 */
5449		dev_err(dev, "Re-trying adapter recovery\n");
5450		goto reschedule_task;
5451	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5452		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5453		/* In case of another error during recovery, it takes 30 sec
5454		 * for adapter to come out of error. Retry error recovery after
5455		 * this time interval.
5456		 */
5457		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5458		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5459		goto reschedule_task;
5460	} else {
5461		dev_err(dev, "Adapter recovery failed\n");
5462		dev_err(dev, "Please reboot server to recover\n");
5463	}
5464
5465	return;
5466
5467reschedule_task:
5468	be_schedule_err_detection(adapter, resched_delay);
5469}
5470
5471static void be_log_sfp_info(struct be_adapter *adapter)
5472{
5473	int status;
5474
5475	status = be_cmd_query_sfp_info(adapter);
5476	if (!status) {
5477		dev_err(&adapter->pdev->dev,
5478			"Port %c: %s Vendor: %s part no: %s",
5479			adapter->port_name,
5480			be_misconfig_evt_port_state[adapter->phy_state],
5481			adapter->phy.vendor_name,
5482			adapter->phy.vendor_pn);
5483	}
5484	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5485}
5486
5487static void be_worker(struct work_struct *work)
5488{
5489	struct be_adapter *adapter =
5490		container_of(work, struct be_adapter, work.work);
5491	struct be_rx_obj *rxo;
5492	int i;
5493
5494	if (be_physfn(adapter) &&
5495	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5496		be_cmd_get_die_temperature(adapter);
5497
5498	/* when interrupts are not yet enabled, just reap any pending
5499	 * mcc completions
5500	 */
5501	if (!netif_running(adapter->netdev)) {
5502		local_bh_disable();
5503		be_process_mcc(adapter);
5504		local_bh_enable();
5505		goto reschedule;
5506	}
5507
5508	if (!adapter->stats_cmd_sent) {
5509		if (lancer_chip(adapter))
5510			lancer_cmd_get_pport_stats(adapter,
5511						   &adapter->stats_cmd);
5512		else
5513			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5514	}
5515
5516	for_all_rx_queues(adapter, rxo, i) {
5517		/* Replenish RX-queues starved due to memory
5518		 * allocation failures.
5519		 */
5520		if (rxo->rx_post_starved)
5521			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5522	}
5523
5524	/* EQ-delay update for Skyhawk is done while notifying EQ */
5525	if (!skyhawk_chip(adapter))
5526		be_eqd_update(adapter, false);
5527
5528	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5529		be_log_sfp_info(adapter);
5530
5531reschedule:
5532	adapter->work_counter++;
5533	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5534}
5535
5536static void be_unmap_pci_bars(struct be_adapter *adapter)
5537{
5538	if (adapter->csr)
5539		pci_iounmap(adapter->pdev, adapter->csr);
5540	if (adapter->db)
5541		pci_iounmap(adapter->pdev, adapter->db);
5542	if (adapter->pcicfg && adapter->pcicfg_mapped)
5543		pci_iounmap(adapter->pdev, adapter->pcicfg);
5544}
5545
5546static int db_bar(struct be_adapter *adapter)
5547{
5548	if (lancer_chip(adapter) || be_virtfn(adapter))
5549		return 0;
5550	else
5551		return 4;
5552}
5553
5554static int be_roce_map_pci_bars(struct be_adapter *adapter)
5555{
5556	if (skyhawk_chip(adapter)) {
5557		adapter->roce_db.size = 4096;
5558		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5559							      db_bar(adapter));
5560		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5561							       db_bar(adapter));
5562	}
5563	return 0;
5564}
5565
5566static int be_map_pci_bars(struct be_adapter *adapter)
5567{
5568	struct pci_dev *pdev = adapter->pdev;
5569	u8 __iomem *addr;
5570	u32 sli_intf;
5571
5572	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5573	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5574				SLI_INTF_FAMILY_SHIFT;
5575	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5576
5577	if (BEx_chip(adapter) && be_physfn(adapter)) {
5578		adapter->csr = pci_iomap(pdev, 2, 0);
5579		if (!adapter->csr)
5580			return -ENOMEM;
5581	}
5582
5583	addr = pci_iomap(pdev, db_bar(adapter), 0);
5584	if (!addr)
5585		goto pci_map_err;
5586	adapter->db = addr;
5587
5588	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5589		if (be_physfn(adapter)) {
5590			/* PCICFG is the 2nd BAR in BE2 */
5591			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5592			if (!addr)
5593				goto pci_map_err;
5594			adapter->pcicfg = addr;
5595			adapter->pcicfg_mapped = true;
5596		} else {
5597			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5598			adapter->pcicfg_mapped = false;
5599		}
5600	}
5601
5602	be_roce_map_pci_bars(adapter);
5603	return 0;
5604
5605pci_map_err:
5606	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5607	be_unmap_pci_bars(adapter);
5608	return -ENOMEM;
5609}
5610
5611static void be_drv_cleanup(struct be_adapter *adapter)
5612{
5613	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5614	struct device *dev = &adapter->pdev->dev;
5615
5616	if (mem->va)
5617		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5618
5619	mem = &adapter->rx_filter;
5620	if (mem->va)
5621		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5622
5623	mem = &adapter->stats_cmd;
5624	if (mem->va)
5625		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5626}
5627
5628/* Allocate and initialize various fields in be_adapter struct */
5629static int be_drv_init(struct be_adapter *adapter)
5630{
5631	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5632	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5633	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5634	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5635	struct device *dev = &adapter->pdev->dev;
5636	int status = 0;
5637
5638	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5639	mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5640						&mbox_mem_alloc->dma,
5641						GFP_KERNEL);
5642	if (!mbox_mem_alloc->va)
5643		return -ENOMEM;
5644
5645	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5646	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5647	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5648
5649	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5650	rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5651					   &rx_filter->dma, GFP_KERNEL);
5652	if (!rx_filter->va) {
5653		status = -ENOMEM;
5654		goto free_mbox;
5655	}
5656
5657	if (lancer_chip(adapter))
5658		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5659	else if (BE2_chip(adapter))
5660		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5661	else if (BE3_chip(adapter))
5662		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5663	else
5664		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5665	stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5666					   &stats_cmd->dma, GFP_KERNEL);
5667	if (!stats_cmd->va) {
5668		status = -ENOMEM;
5669		goto free_rx_filter;
5670	}
5671
5672	mutex_init(&adapter->mbox_lock);
5673	mutex_init(&adapter->mcc_lock);
5674	mutex_init(&adapter->rx_filter_lock);
5675	spin_lock_init(&adapter->mcc_cq_lock);
5676	init_completion(&adapter->et_cmd_compl);
5677
5678	pci_save_state(adapter->pdev);
5679
5680	INIT_DELAYED_WORK(&adapter->work, be_worker);
5681
5682	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5683	adapter->error_recovery.resched_delay = 0;
5684	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5685			  be_err_detection_task);
5686
5687	adapter->rx_fc = true;
5688	adapter->tx_fc = true;
5689
5690	/* Must be a power of 2 or else MODULO will BUG_ON */
5691	adapter->be_get_temp_freq = 64;
5692
5693	return 0;
5694
5695free_rx_filter:
5696	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5697free_mbox:
5698	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5699			  mbox_mem_alloc->dma);
5700	return status;
5701}
5702
5703static void be_remove(struct pci_dev *pdev)
5704{
5705	struct be_adapter *adapter = pci_get_drvdata(pdev);
5706
5707	if (!adapter)
5708		return;
5709
5710	be_roce_dev_remove(adapter);
5711	be_intr_set(adapter, false);
5712
5713	be_cancel_err_detection(adapter);
5714
5715	unregister_netdev(adapter->netdev);
5716
5717	be_clear(adapter);
5718
5719	if (!pci_vfs_assigned(adapter->pdev))
5720		be_cmd_reset_function(adapter);
5721
5722	/* tell fw we're done with firing cmds */
5723	be_cmd_fw_clean(adapter);
5724
5725	be_unmap_pci_bars(adapter);
5726	be_drv_cleanup(adapter);
5727
5728	pci_release_regions(pdev);
5729	pci_disable_device(pdev);
5730
5731	free_netdev(adapter->netdev);
5732}
5733
5734static ssize_t be_hwmon_show_temp(struct device *dev,
5735				  struct device_attribute *dev_attr,
5736				  char *buf)
5737{
5738	struct be_adapter *adapter = dev_get_drvdata(dev);
5739
5740	/* Unit: millidegree Celsius */
5741	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5742		return -EIO;
5743	else
5744		return sprintf(buf, "%u\n",
5745			       adapter->hwmon_info.be_on_die_temp * 1000);
5746}
5747
5748static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5749			  be_hwmon_show_temp, NULL, 1);
5750
5751static struct attribute *be_hwmon_attrs[] = {
5752	&sensor_dev_attr_temp1_input.dev_attr.attr,
5753	NULL
5754};
5755
5756ATTRIBUTE_GROUPS(be_hwmon);
5757
5758static char *mc_name(struct be_adapter *adapter)
5759{
5760	char *str = "";	/* default */
5761
5762	switch (adapter->mc_type) {
5763	case UMC:
5764		str = "UMC";
5765		break;
5766	case FLEX10:
5767		str = "FLEX10";
5768		break;
5769	case vNIC1:
5770		str = "vNIC-1";
5771		break;
5772	case nPAR:
5773		str = "nPAR";
5774		break;
5775	case UFP:
5776		str = "UFP";
5777		break;
5778	case vNIC2:
5779		str = "vNIC-2";
5780		break;
5781	default:
5782		str = "";
5783	}
5784
5785	return str;
5786}
5787
5788static inline char *func_name(struct be_adapter *adapter)
5789{
5790	return be_physfn(adapter) ? "PF" : "VF";
5791}
5792
5793static inline char *nic_name(struct pci_dev *pdev)
5794{
5795	switch (pdev->device) {
5796	case OC_DEVICE_ID1:
5797		return OC_NAME;
5798	case OC_DEVICE_ID2:
5799		return OC_NAME_BE;
5800	case OC_DEVICE_ID3:
5801	case OC_DEVICE_ID4:
5802		return OC_NAME_LANCER;
5803	case BE_DEVICE_ID2:
5804		return BE3_NAME;
5805	case OC_DEVICE_ID5:
5806	case OC_DEVICE_ID6:
5807		return OC_NAME_SH;
5808	default:
5809		return BE_NAME;
5810	}
5811}
5812
5813static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5814{
5815	struct be_adapter *adapter;
5816	struct net_device *netdev;
5817	int status = 0;
5818
5819	status = pci_enable_device(pdev);
5820	if (status)
5821		goto do_none;
5822
5823	status = pci_request_regions(pdev, DRV_NAME);
5824	if (status)
5825		goto disable_dev;
5826	pci_set_master(pdev);
5827
5828	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5829	if (!netdev) {
5830		status = -ENOMEM;
5831		goto rel_reg;
5832	}
5833	adapter = netdev_priv(netdev);
5834	adapter->pdev = pdev;
5835	pci_set_drvdata(pdev, adapter);
5836	adapter->netdev = netdev;
5837	SET_NETDEV_DEV(netdev, &pdev->dev);
5838
5839	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5840	if (status) {
5841		dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5842		goto free_netdev;
5843	}
5844
5845	status = be_map_pci_bars(adapter);
5846	if (status)
5847		goto free_netdev;
5848
5849	status = be_drv_init(adapter);
5850	if (status)
5851		goto unmap_bars;
5852
5853	status = be_setup(adapter);
5854	if (status)
5855		goto drv_cleanup;
5856
5857	be_netdev_init(netdev);
5858	status = register_netdev(netdev);
5859	if (status != 0)
5860		goto unsetup;
5861
5862	be_roce_dev_add(adapter);
5863
5864	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5865	adapter->error_recovery.probe_time = jiffies;
5866
5867	/* On Die temperature not supported for VF. */
5868	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5869		adapter->hwmon_info.hwmon_dev =
5870			devm_hwmon_device_register_with_groups(&pdev->dev,
5871							       DRV_NAME,
5872							       adapter,
5873							       be_hwmon_groups);
5874		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5875	}
5876
5877	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5878		 func_name(adapter), mc_name(adapter), adapter->port_name);
5879
5880	return 0;
5881
5882unsetup:
5883	be_clear(adapter);
5884drv_cleanup:
5885	be_drv_cleanup(adapter);
5886unmap_bars:
5887	be_unmap_pci_bars(adapter);
5888free_netdev:
5889	free_netdev(netdev);
5890rel_reg:
5891	pci_release_regions(pdev);
5892disable_dev:
5893	pci_disable_device(pdev);
5894do_none:
5895	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5896	return status;
5897}
5898
5899static int __maybe_unused be_suspend(struct device *dev_d)
5900{
5901	struct be_adapter *adapter = dev_get_drvdata(dev_d);
5902
5903	be_intr_set(adapter, false);
5904	be_cancel_err_detection(adapter);
5905
5906	be_cleanup(adapter);
5907
5908	return 0;
5909}
5910
5911static int __maybe_unused be_pci_resume(struct device *dev_d)
5912{
5913	struct be_adapter *adapter = dev_get_drvdata(dev_d);
5914	int status = 0;
5915
5916	status = be_resume(adapter);
5917	if (status)
5918		return status;
5919
5920	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5921
5922	return 0;
5923}
5924
5925/*
5926 * An FLR will stop BE from DMAing any data.
5927 */
5928static void be_shutdown(struct pci_dev *pdev)
5929{
5930	struct be_adapter *adapter = pci_get_drvdata(pdev);
5931
5932	if (!adapter)
5933		return;
5934
5935	be_roce_dev_shutdown(adapter);
5936	cancel_delayed_work_sync(&adapter->work);
5937	be_cancel_err_detection(adapter);
5938
5939	netif_device_detach(adapter->netdev);
5940
5941	be_cmd_reset_function(adapter);
5942
5943	pci_disable_device(pdev);
5944}
5945
5946static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5947					    pci_channel_state_t state)
5948{
5949	struct be_adapter *adapter = pci_get_drvdata(pdev);
5950
5951	dev_err(&adapter->pdev->dev, "EEH error detected\n");
5952
5953	be_roce_dev_remove(adapter);
5954
5955	if (!be_check_error(adapter, BE_ERROR_EEH)) {
5956		be_set_error(adapter, BE_ERROR_EEH);
5957
5958		be_cancel_err_detection(adapter);
5959
5960		be_cleanup(adapter);
5961	}
5962
5963	if (state == pci_channel_io_perm_failure)
5964		return PCI_ERS_RESULT_DISCONNECT;
5965
5966	pci_disable_device(pdev);
5967
5968	/* The error could cause the FW to trigger a flash debug dump.
5969	 * Resetting the card while flash dump is in progress
5970	 * can cause it not to recover; wait for it to finish.
5971	 * Wait only for first function as it is needed only once per
5972	 * adapter.
5973	 */
5974	if (pdev->devfn == 0)
5975		ssleep(30);
5976
5977	return PCI_ERS_RESULT_NEED_RESET;
5978}
5979
5980static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5981{
5982	struct be_adapter *adapter = pci_get_drvdata(pdev);
5983	int status;
5984
5985	dev_info(&adapter->pdev->dev, "EEH reset\n");
5986
5987	status = pci_enable_device(pdev);
5988	if (status)
5989		return PCI_ERS_RESULT_DISCONNECT;
5990
5991	pci_set_master(pdev);
5992	pci_restore_state(pdev);
5993
5994	/* Check if card is ok and fw is ready */
5995	dev_info(&adapter->pdev->dev,
5996		 "Waiting for FW to be ready after EEH reset\n");
5997	status = be_fw_wait_ready(adapter);
5998	if (status)
5999		return PCI_ERS_RESULT_DISCONNECT;
6000
6001	be_clear_error(adapter, BE_CLEAR_ALL);
6002	return PCI_ERS_RESULT_RECOVERED;
6003}
6004
6005static void be_eeh_resume(struct pci_dev *pdev)
6006{
6007	int status = 0;
6008	struct be_adapter *adapter = pci_get_drvdata(pdev);
6009
6010	dev_info(&adapter->pdev->dev, "EEH resume\n");
6011
6012	pci_save_state(pdev);
6013
6014	status = be_resume(adapter);
6015	if (status)
6016		goto err;
6017
6018	be_roce_dev_add(adapter);
6019
6020	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6021	return;
6022err:
6023	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6024}
6025
6026static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6027{
6028	struct be_adapter *adapter = pci_get_drvdata(pdev);
6029	struct be_resources vft_res = {0};
6030	int status;
6031
6032	if (!num_vfs)
6033		be_vf_clear(adapter);
6034
6035	adapter->num_vfs = num_vfs;
6036
6037	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6038		dev_warn(&pdev->dev,
6039			 "Cannot disable VFs while they are assigned\n");
6040		return -EBUSY;
6041	}
6042
6043	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6044	 * are equally distributed across the max-number of VFs. The user may
6045	 * request only a subset of the max-vfs to be enabled.
6046	 * Based on num_vfs, redistribute the resources across num_vfs so that
6047	 * each VF will have access to more number of resources.
6048	 * This facility is not available in BE3 FW.
6049	 * Also, this is done by FW in Lancer chip.
6050	 */
6051	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6052		be_calculate_vf_res(adapter, adapter->num_vfs,
6053				    &vft_res);
6054		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6055						 adapter->num_vfs, &vft_res);
6056		if (status)
6057			dev_err(&pdev->dev,
6058				"Failed to optimize SR-IOV resources\n");
6059	}
6060
6061	status = be_get_resources(adapter);
6062	if (status)
6063		return be_cmd_status(status);
6064
6065	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6066	rtnl_lock();
6067	status = be_update_queues(adapter);
6068	rtnl_unlock();
6069	if (status)
6070		return be_cmd_status(status);
6071
6072	if (adapter->num_vfs)
6073		status = be_vf_setup(adapter);
6074
6075	if (!status)
6076		return adapter->num_vfs;
6077
6078	return 0;
6079}
6080
6081static const struct pci_error_handlers be_eeh_handlers = {
6082	.error_detected = be_eeh_err_detected,
6083	.slot_reset = be_eeh_reset,
6084	.resume = be_eeh_resume,
6085};
6086
6087static SIMPLE_DEV_PM_OPS(be_pci_pm_ops, be_suspend, be_pci_resume);
6088
6089static struct pci_driver be_driver = {
6090	.name = DRV_NAME,
6091	.id_table = be_dev_ids,
6092	.probe = be_probe,
6093	.remove = be_remove,
6094	.driver.pm = &be_pci_pm_ops,
6095	.shutdown = be_shutdown,
6096	.sriov_configure = be_pci_sriov_configure,
6097	.err_handler = &be_eeh_handlers
6098};
6099
6100static int __init be_init_module(void)
6101{
6102	int status;
6103
6104	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6105	    rx_frag_size != 2048) {
6106		printk(KERN_WARNING DRV_NAME
6107			" : Module param rx_frag_size must be 2048/4096/8192."
6108			" Using 2048\n");
6109		rx_frag_size = 2048;
6110	}
6111
6112	if (num_vfs > 0) {
6113		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6114		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6115	}
6116
6117	be_wq = create_singlethread_workqueue("be_wq");
6118	if (!be_wq) {
6119		pr_warn(DRV_NAME "workqueue creation failed\n");
6120		return -1;
6121	}
6122
6123	be_err_recovery_workq =
6124		create_singlethread_workqueue("be_err_recover");
6125	if (!be_err_recovery_workq)
6126		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6127
6128	status = pci_register_driver(&be_driver);
6129	if (status) {
6130		destroy_workqueue(be_wq);
6131		be_destroy_err_recovery_workq();
6132	}
6133	return status;
6134}
6135module_init(be_init_module);
6136
6137static void __exit be_exit_module(void)
6138{
6139	pci_unregister_driver(&be_driver);
6140
6141	be_destroy_err_recovery_workq();
6142
6143	if (be_wq)
6144		destroy_workqueue(be_wq);
6145}
6146module_exit(be_exit_module);
6147