• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/drivers/net/myri10ge/
1/*************************************************************************
2 * myri10ge.c: Myricom Myri-10G Ethernet driver.
3 *
4 * Copyright (C) 2005 - 2009 Myricom, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of Myricom, Inc. nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * If the eeprom on your board is not recent enough, you will need to get a
33 * newer firmware image at:
34 *   http://www.myri.com/scs/download-Myri10GE.html
35 *
36 * Contact Information:
37 *   <help@myri.com>
38 *   Myricom, Inc., 325N Santa Anita Avenue, Arcadia, CA 91006
39 *************************************************************************/
40
41#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42
43#include <linux/tcp.h>
44#include <linux/netdevice.h>
45#include <linux/skbuff.h>
46#include <linux/string.h>
47#include <linux/module.h>
48#include <linux/pci.h>
49#include <linux/dma-mapping.h>
50#include <linux/etherdevice.h>
51#include <linux/if_ether.h>
52#include <linux/if_vlan.h>
53#include <linux/inet_lro.h>
54#include <linux/dca.h>
55#include <linux/ip.h>
56#include <linux/inet.h>
57#include <linux/in.h>
58#include <linux/ethtool.h>
59#include <linux/firmware.h>
60#include <linux/delay.h>
61#include <linux/timer.h>
62#include <linux/vmalloc.h>
63#include <linux/crc32.h>
64#include <linux/moduleparam.h>
65#include <linux/io.h>
66#include <linux/log2.h>
67#include <linux/slab.h>
68#include <net/checksum.h>
69#include <net/ip.h>
70#include <net/tcp.h>
71#include <asm/byteorder.h>
72#include <asm/io.h>
73#include <asm/processor.h>
74#ifdef CONFIG_MTRR
75#include <asm/mtrr.h>
76#endif
77
78#include "myri10ge_mcp.h"
79#include "myri10ge_mcp_gen_header.h"
80
81#define MYRI10GE_VERSION_STR "1.5.2-1.459"
82
83MODULE_DESCRIPTION("Myricom 10G driver (10GbE)");
84MODULE_AUTHOR("Maintainer: help@myri.com");
85MODULE_VERSION(MYRI10GE_VERSION_STR);
86MODULE_LICENSE("Dual BSD/GPL");
87
88#define MYRI10GE_MAX_ETHER_MTU 9014
89
90#define MYRI10GE_ETH_STOPPED 0
91#define MYRI10GE_ETH_STOPPING 1
92#define MYRI10GE_ETH_STARTING 2
93#define MYRI10GE_ETH_RUNNING 3
94#define MYRI10GE_ETH_OPEN_FAILED 4
95
96#define MYRI10GE_EEPROM_STRINGS_SIZE 256
97#define MYRI10GE_MAX_SEND_DESC_TSO ((65536 / 2048) * 2)
98#define MYRI10GE_MAX_LRO_DESCRIPTORS 8
99#define MYRI10GE_LRO_MAX_PKTS 64
100
101#define MYRI10GE_NO_CONFIRM_DATA htonl(0xffffffff)
102#define MYRI10GE_NO_RESPONSE_RESULT 0xffffffff
103
104#define MYRI10GE_ALLOC_ORDER 0
105#define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE)
106#define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1)
107
108#define MYRI10GE_MAX_SLICES 32
109
110struct myri10ge_rx_buffer_state {
111	struct page *page;
112	int page_offset;
113	DEFINE_DMA_UNMAP_ADDR(bus);
114	DEFINE_DMA_UNMAP_LEN(len);
115};
116
117struct myri10ge_tx_buffer_state {
118	struct sk_buff *skb;
119	int last;
120	DEFINE_DMA_UNMAP_ADDR(bus);
121	DEFINE_DMA_UNMAP_LEN(len);
122};
123
124struct myri10ge_cmd {
125	u32 data0;
126	u32 data1;
127	u32 data2;
128};
129
130struct myri10ge_rx_buf {
131	struct mcp_kreq_ether_recv __iomem *lanai;	/* lanai ptr for recv ring */
132	struct mcp_kreq_ether_recv *shadow;	/* host shadow of recv ring */
133	struct myri10ge_rx_buffer_state *info;
134	struct page *page;
135	dma_addr_t bus;
136	int page_offset;
137	int cnt;
138	int fill_cnt;
139	int alloc_fail;
140	int mask;		/* number of rx slots -1 */
141	int watchdog_needed;
142};
143
144struct myri10ge_tx_buf {
145	struct mcp_kreq_ether_send __iomem *lanai;	/* lanai ptr for sendq */
146	__be32 __iomem *send_go;	/* "go" doorbell ptr */
147	__be32 __iomem *send_stop;	/* "stop" doorbell ptr */
148	struct mcp_kreq_ether_send *req_list;	/* host shadow of sendq */
149	char *req_bytes;
150	struct myri10ge_tx_buffer_state *info;
151	int mask;		/* number of transmit slots -1  */
152	int req ____cacheline_aligned;	/* transmit slots submitted     */
153	int pkt_start;		/* packets started */
154	int stop_queue;
155	int linearized;
156	int done ____cacheline_aligned;	/* transmit slots completed     */
157	int pkt_done;		/* packets completed */
158	int wake_queue;
159	int queue_active;
160};
161
162struct myri10ge_rx_done {
163	struct mcp_slot *entry;
164	dma_addr_t bus;
165	int cnt;
166	int idx;
167	struct net_lro_mgr lro_mgr;
168	struct net_lro_desc lro_desc[MYRI10GE_MAX_LRO_DESCRIPTORS];
169};
170
171struct myri10ge_slice_netstats {
172	unsigned long rx_packets;
173	unsigned long tx_packets;
174	unsigned long rx_bytes;
175	unsigned long tx_bytes;
176	unsigned long rx_dropped;
177	unsigned long tx_dropped;
178};
179
180struct myri10ge_slice_state {
181	struct myri10ge_tx_buf tx;	/* transmit ring        */
182	struct myri10ge_rx_buf rx_small;
183	struct myri10ge_rx_buf rx_big;
184	struct myri10ge_rx_done rx_done;
185	struct net_device *dev;
186	struct napi_struct napi;
187	struct myri10ge_priv *mgp;
188	struct myri10ge_slice_netstats stats;
189	__be32 __iomem *irq_claim;
190	struct mcp_irq_data *fw_stats;
191	dma_addr_t fw_stats_bus;
192	int watchdog_tx_done;
193	int watchdog_tx_req;
194	int watchdog_rx_done;
195#ifdef CONFIG_MYRI10GE_DCA
196	int cached_dca_tag;
197	int cpu;
198	__be32 __iomem *dca_tag;
199#endif
200	char irq_desc[32];
201};
202
203struct myri10ge_priv {
204	struct myri10ge_slice_state *ss;
205	int tx_boundary;	/* boundary transmits cannot cross */
206	int num_slices;
207	int running;		/* running?             */
208	int csum_flag;		/* rx_csums?            */
209	int small_bytes;
210	int big_bytes;
211	int max_intr_slots;
212	struct net_device *dev;
213	spinlock_t stats_lock;
214	u8 __iomem *sram;
215	int sram_size;
216	unsigned long board_span;
217	unsigned long iomem_base;
218	__be32 __iomem *irq_deassert;
219	char *mac_addr_string;
220	struct mcp_cmd_response *cmd;
221	dma_addr_t cmd_bus;
222	struct pci_dev *pdev;
223	int msi_enabled;
224	int msix_enabled;
225	struct msix_entry *msix_vectors;
226#ifdef CONFIG_MYRI10GE_DCA
227	int dca_enabled;
228#endif
229	u32 link_state;
230	unsigned int rdma_tags_available;
231	int intr_coal_delay;
232	__be32 __iomem *intr_coal_delay_ptr;
233	int mtrr;
234	int wc_enabled;
235	int down_cnt;
236	wait_queue_head_t down_wq;
237	struct work_struct watchdog_work;
238	struct timer_list watchdog_timer;
239	int watchdog_resets;
240	int watchdog_pause;
241	int pause;
242	bool fw_name_allocated;
243	char *fw_name;
244	char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE];
245	char *product_code_string;
246	char fw_version[128];
247	int fw_ver_major;
248	int fw_ver_minor;
249	int fw_ver_tiny;
250	int adopted_rx_filter_bug;
251	u8 mac_addr[6];		/* eeprom mac address */
252	unsigned long serial_number;
253	int vendor_specific_offset;
254	int fw_multicast_support;
255	unsigned long features;
256	u32 max_tso6;
257	u32 read_dma;
258	u32 write_dma;
259	u32 read_write_dma;
260	u32 link_changes;
261	u32 msg_enable;
262	unsigned int board_number;
263	int rebooted;
264};
265
266static char *myri10ge_fw_unaligned = "myri10ge_ethp_z8e.dat";
267static char *myri10ge_fw_aligned = "myri10ge_eth_z8e.dat";
268static char *myri10ge_fw_rss_unaligned = "myri10ge_rss_ethp_z8e.dat";
269static char *myri10ge_fw_rss_aligned = "myri10ge_rss_eth_z8e.dat";
270MODULE_FIRMWARE("myri10ge_ethp_z8e.dat");
271MODULE_FIRMWARE("myri10ge_eth_z8e.dat");
272MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat");
273MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat");
274
275/* Careful: must be accessed under kparam_block_sysfs_write */
276static char *myri10ge_fw_name = NULL;
277module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR);
278MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name");
279
280#define MYRI10GE_MAX_BOARDS 8
281static char *myri10ge_fw_names[MYRI10GE_MAX_BOARDS] =
282    {[0 ... (MYRI10GE_MAX_BOARDS - 1)] = NULL };
283module_param_array_named(myri10ge_fw_names, myri10ge_fw_names, charp, NULL,
284			 0444);
285MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image names per board");
286
287static int myri10ge_ecrc_enable = 1;
288module_param(myri10ge_ecrc_enable, int, S_IRUGO);
289MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E");
290
291static int myri10ge_small_bytes = -1;	/* -1 == auto */
292module_param(myri10ge_small_bytes, int, S_IRUGO | S_IWUSR);
293MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets");
294
295static int myri10ge_msi = 1;	/* enable msi by default */
296module_param(myri10ge_msi, int, S_IRUGO | S_IWUSR);
297MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts");
298
299static int myri10ge_intr_coal_delay = 75;
300module_param(myri10ge_intr_coal_delay, int, S_IRUGO);
301MODULE_PARM_DESC(myri10ge_intr_coal_delay, "Interrupt coalescing delay");
302
303static int myri10ge_flow_control = 1;
304module_param(myri10ge_flow_control, int, S_IRUGO);
305MODULE_PARM_DESC(myri10ge_flow_control, "Pause parameter");
306
307static int myri10ge_deassert_wait = 1;
308module_param(myri10ge_deassert_wait, int, S_IRUGO | S_IWUSR);
309MODULE_PARM_DESC(myri10ge_deassert_wait,
310		 "Wait when deasserting legacy interrupts");
311
312static int myri10ge_force_firmware = 0;
313module_param(myri10ge_force_firmware, int, S_IRUGO);
314MODULE_PARM_DESC(myri10ge_force_firmware,
315		 "Force firmware to assume aligned completions");
316
317static int myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN;
318module_param(myri10ge_initial_mtu, int, S_IRUGO);
319MODULE_PARM_DESC(myri10ge_initial_mtu, "Initial MTU");
320
321static int myri10ge_napi_weight = 64;
322module_param(myri10ge_napi_weight, int, S_IRUGO);
323MODULE_PARM_DESC(myri10ge_napi_weight, "Set NAPI weight");
324
325static int myri10ge_watchdog_timeout = 1;
326module_param(myri10ge_watchdog_timeout, int, S_IRUGO);
327MODULE_PARM_DESC(myri10ge_watchdog_timeout, "Set watchdog timeout");
328
329static int myri10ge_max_irq_loops = 1048576;
330module_param(myri10ge_max_irq_loops, int, S_IRUGO);
331MODULE_PARM_DESC(myri10ge_max_irq_loops,
332		 "Set stuck legacy IRQ detection threshold");
333
334#define MYRI10GE_MSG_DEFAULT NETIF_MSG_LINK
335
336static int myri10ge_debug = -1;	/* defaults above */
337module_param(myri10ge_debug, int, 0);
338MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)");
339
340static int myri10ge_lro_max_pkts = MYRI10GE_LRO_MAX_PKTS;
341module_param(myri10ge_lro_max_pkts, int, S_IRUGO);
342MODULE_PARM_DESC(myri10ge_lro_max_pkts,
343		 "Number of LRO packets to be aggregated");
344
345static int myri10ge_fill_thresh = 256;
346module_param(myri10ge_fill_thresh, int, S_IRUGO | S_IWUSR);
347MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed");
348
349static int myri10ge_reset_recover = 1;
350
351static int myri10ge_max_slices = 1;
352module_param(myri10ge_max_slices, int, S_IRUGO);
353MODULE_PARM_DESC(myri10ge_max_slices, "Max tx/rx queues");
354
355static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
356module_param(myri10ge_rss_hash, int, S_IRUGO);
357MODULE_PARM_DESC(myri10ge_rss_hash, "Type of RSS hashing to do");
358
359static int myri10ge_dca = 1;
360module_param(myri10ge_dca, int, S_IRUGO);
361MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible");
362
363#define MYRI10GE_FW_OFFSET 1024*1024
364#define MYRI10GE_HIGHPART_TO_U32(X) \
365(sizeof (X) == 8) ? ((u32)((u64)(X) >> 32)) : (0)
366#define MYRI10GE_LOWPART_TO_U32(X) ((u32)(X))
367
368#define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8)
369
370static void myri10ge_set_multicast_list(struct net_device *dev);
371static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
372					 struct net_device *dev);
373
374static inline void put_be32(__be32 val, __be32 __iomem * p)
375{
376	__raw_writel((__force __u32) val, (__force void __iomem *)p);
377}
378
379static struct net_device_stats *myri10ge_get_stats(struct net_device *dev);
380
381static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated)
382{
383	if (mgp->fw_name_allocated)
384		kfree(mgp->fw_name);
385	mgp->fw_name = name;
386	mgp->fw_name_allocated = allocated;
387}
388
389static int
390myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd,
391		  struct myri10ge_cmd *data, int atomic)
392{
393	struct mcp_cmd *buf;
394	char buf_bytes[sizeof(*buf) + 8];
395	struct mcp_cmd_response *response = mgp->cmd;
396	char __iomem *cmd_addr = mgp->sram + MXGEFW_ETH_CMD;
397	u32 dma_low, dma_high, result, value;
398	int sleep_total = 0;
399
400	/* ensure buf is aligned to 8 bytes */
401	buf = (struct mcp_cmd *)ALIGN((unsigned long)buf_bytes, 8);
402
403	buf->data0 = htonl(data->data0);
404	buf->data1 = htonl(data->data1);
405	buf->data2 = htonl(data->data2);
406	buf->cmd = htonl(cmd);
407	dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus);
408	dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus);
409
410	buf->response_addr.low = htonl(dma_low);
411	buf->response_addr.high = htonl(dma_high);
412	response->result = htonl(MYRI10GE_NO_RESPONSE_RESULT);
413	mb();
414	myri10ge_pio_copy(cmd_addr, buf, sizeof(*buf));
415
416	/* wait up to 15ms. Longest command is the DMA benchmark,
417	 * which is capped at 5ms, but runs from a timeout handler
418	 * that runs every 7.8ms. So a 15ms timeout leaves us with
419	 * a 2.2ms margin
420	 */
421	if (atomic) {
422		/* if atomic is set, do not sleep,
423		 * and try to get the completion quickly
424		 * (1ms will be enough for those commands) */
425		for (sleep_total = 0;
426		     sleep_total < 1000 &&
427		     response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT);
428		     sleep_total += 10) {
429			udelay(10);
430			mb();
431		}
432	} else {
433		/* use msleep for most command */
434		for (sleep_total = 0;
435		     sleep_total < 15 &&
436		     response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT);
437		     sleep_total++)
438			msleep(1);
439	}
440
441	result = ntohl(response->result);
442	value = ntohl(response->data);
443	if (result != MYRI10GE_NO_RESPONSE_RESULT) {
444		if (result == 0) {
445			data->data0 = value;
446			return 0;
447		} else if (result == MXGEFW_CMD_UNKNOWN) {
448			return -ENOSYS;
449		} else if (result == MXGEFW_CMD_ERROR_UNALIGNED) {
450			return -E2BIG;
451		} else if (result == MXGEFW_CMD_ERROR_RANGE &&
452			   cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES &&
453			   (data->
454			    data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) !=
455			   0) {
456			return -ERANGE;
457		} else {
458			dev_err(&mgp->pdev->dev,
459				"command %d failed, result = %d\n",
460				cmd, result);
461			return -ENXIO;
462		}
463	}
464
465	dev_err(&mgp->pdev->dev, "command %d timed out, result = %d\n",
466		cmd, result);
467	return -EAGAIN;
468}
469
470/*
471 * The eeprom strings on the lanaiX have the format
472 * SN=x\0
473 * MAC=x:x:x:x:x:x\0
474 * PT:ddd mmm xx xx:xx:xx xx\0
475 * PV:ddd mmm xx xx:xx:xx xx\0
476 */
477static int myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
478{
479	char *ptr, *limit;
480	int i;
481
482	ptr = mgp->eeprom_strings;
483	limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
484
485	while (*ptr != '\0' && ptr < limit) {
486		if (memcmp(ptr, "MAC=", 4) == 0) {
487			ptr += 4;
488			mgp->mac_addr_string = ptr;
489			for (i = 0; i < 6; i++) {
490				if ((ptr + 2) > limit)
491					goto abort;
492				mgp->mac_addr[i] =
493				    simple_strtoul(ptr, &ptr, 16);
494				ptr += 1;
495			}
496		}
497		if (memcmp(ptr, "PC=", 3) == 0) {
498			ptr += 3;
499			mgp->product_code_string = ptr;
500		}
501		if (memcmp((const void *)ptr, "SN=", 3) == 0) {
502			ptr += 3;
503			mgp->serial_number = simple_strtoul(ptr, &ptr, 10);
504		}
505		while (ptr < limit && *ptr++) ;
506	}
507
508	return 0;
509
510abort:
511	dev_err(&mgp->pdev->dev, "failed to parse eeprom_strings\n");
512	return -ENXIO;
513}
514
515/*
516 * Enable or disable periodic RDMAs from the host to make certain
517 * chipsets resend dropped PCIe messages
518 */
519
520static void myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
521{
522	char __iomem *submit;
523	__be32 buf[16] __attribute__ ((__aligned__(8)));
524	u32 dma_low, dma_high;
525	int i;
526
527	/* clear confirmation addr */
528	mgp->cmd->data = 0;
529	mb();
530
531	/* send a rdma command to the PCIe engine, and wait for the
532	 * response in the confirmation address.  The firmware should
533	 * write a -1 there to indicate it is alive and well
534	 */
535	dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus);
536	dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus);
537
538	buf[0] = htonl(dma_high);	/* confirm addr MSW */
539	buf[1] = htonl(dma_low);	/* confirm addr LSW */
540	buf[2] = MYRI10GE_NO_CONFIRM_DATA;	/* confirm data */
541	buf[3] = htonl(dma_high);	/* dummy addr MSW */
542	buf[4] = htonl(dma_low);	/* dummy addr LSW */
543	buf[5] = htonl(enable);	/* enable? */
544
545	submit = mgp->sram + MXGEFW_BOOT_DUMMY_RDMA;
546
547	myri10ge_pio_copy(submit, &buf, sizeof(buf));
548	for (i = 0; mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 20; i++)
549		msleep(1);
550	if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA)
551		dev_err(&mgp->pdev->dev, "dummy rdma %s failed\n",
552			(enable ? "enable" : "disable"));
553}
554
555static int
556myri10ge_validate_firmware(struct myri10ge_priv *mgp,
557			   struct mcp_gen_header *hdr)
558{
559	struct device *dev = &mgp->pdev->dev;
560
561	/* check firmware type */
562	if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) {
563		dev_err(dev, "Bad firmware type: 0x%x\n", ntohl(hdr->mcp_type));
564		return -EINVAL;
565	}
566
567	/* save firmware version for ethtool */
568	strncpy(mgp->fw_version, hdr->version, sizeof(mgp->fw_version));
569
570	sscanf(mgp->fw_version, "%d.%d.%d", &mgp->fw_ver_major,
571	       &mgp->fw_ver_minor, &mgp->fw_ver_tiny);
572
573	if (!(mgp->fw_ver_major == MXGEFW_VERSION_MAJOR &&
574	      mgp->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
575		dev_err(dev, "Found firmware version %s\n", mgp->fw_version);
576		dev_err(dev, "Driver needs %d.%d\n", MXGEFW_VERSION_MAJOR,
577			MXGEFW_VERSION_MINOR);
578		return -EINVAL;
579	}
580	return 0;
581}
582
583static int myri10ge_load_hotplug_firmware(struct myri10ge_priv *mgp, u32 * size)
584{
585	unsigned crc, reread_crc;
586	const struct firmware *fw;
587	struct device *dev = &mgp->pdev->dev;
588	unsigned char *fw_readback;
589	struct mcp_gen_header *hdr;
590	size_t hdr_offset;
591	int status;
592	unsigned i;
593
594	if ((status = request_firmware(&fw, mgp->fw_name, dev)) < 0) {
595		dev_err(dev, "Unable to load %s firmware image via hotplug\n",
596			mgp->fw_name);
597		status = -EINVAL;
598		goto abort_with_nothing;
599	}
600
601	/* check size */
602
603	if (fw->size >= mgp->sram_size - MYRI10GE_FW_OFFSET ||
604	    fw->size < MCP_HEADER_PTR_OFFSET + 4) {
605		dev_err(dev, "Firmware size invalid:%d\n", (int)fw->size);
606		status = -EINVAL;
607		goto abort_with_fw;
608	}
609
610	/* check id */
611	hdr_offset = ntohl(*(__be32 *) (fw->data + MCP_HEADER_PTR_OFFSET));
612	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->size) {
613		dev_err(dev, "Bad firmware file\n");
614		status = -EINVAL;
615		goto abort_with_fw;
616	}
617	hdr = (void *)(fw->data + hdr_offset);
618
619	status = myri10ge_validate_firmware(mgp, hdr);
620	if (status != 0)
621		goto abort_with_fw;
622
623	crc = crc32(~0, fw->data, fw->size);
624	for (i = 0; i < fw->size; i += 256) {
625		myri10ge_pio_copy(mgp->sram + MYRI10GE_FW_OFFSET + i,
626				  fw->data + i,
627				  min(256U, (unsigned)(fw->size - i)));
628		mb();
629		readb(mgp->sram);
630	}
631	fw_readback = vmalloc(fw->size);
632	if (!fw_readback) {
633		status = -ENOMEM;
634		goto abort_with_fw;
635	}
636	/* corruption checking is good for parity recovery and buggy chipset */
637	memcpy_fromio(fw_readback, mgp->sram + MYRI10GE_FW_OFFSET, fw->size);
638	reread_crc = crc32(~0, fw_readback, fw->size);
639	vfree(fw_readback);
640	if (crc != reread_crc) {
641		dev_err(dev, "CRC failed(fw-len=%u), got 0x%x (expect 0x%x)\n",
642			(unsigned)fw->size, reread_crc, crc);
643		status = -EIO;
644		goto abort_with_fw;
645	}
646	*size = (u32) fw->size;
647
648abort_with_fw:
649	release_firmware(fw);
650
651abort_with_nothing:
652	return status;
653}
654
655static int myri10ge_adopt_running_firmware(struct myri10ge_priv *mgp)
656{
657	struct mcp_gen_header *hdr;
658	struct device *dev = &mgp->pdev->dev;
659	const size_t bytes = sizeof(struct mcp_gen_header);
660	size_t hdr_offset;
661	int status;
662
663	/* find running firmware header */
664	hdr_offset = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET));
665
666	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > mgp->sram_size) {
667		dev_err(dev, "Running firmware has bad header offset (%d)\n",
668			(int)hdr_offset);
669		return -EIO;
670	}
671
672	/* copy header of running firmware from SRAM to host memory to
673	 * validate firmware */
674	hdr = kmalloc(bytes, GFP_KERNEL);
675	if (hdr == NULL) {
676		dev_err(dev, "could not malloc firmware hdr\n");
677		return -ENOMEM;
678	}
679	memcpy_fromio(hdr, mgp->sram + hdr_offset, bytes);
680	status = myri10ge_validate_firmware(mgp, hdr);
681	kfree(hdr);
682
683	/* check to see if adopted firmware has bug where adopting
684	 * it will cause broadcasts to be filtered unless the NIC
685	 * is kept in ALLMULTI mode */
686	if (mgp->fw_ver_major == 1 && mgp->fw_ver_minor == 4 &&
687	    mgp->fw_ver_tiny >= 4 && mgp->fw_ver_tiny <= 11) {
688		mgp->adopted_rx_filter_bug = 1;
689		dev_warn(dev, "Adopting fw %d.%d.%d: "
690			 "working around rx filter bug\n",
691			 mgp->fw_ver_major, mgp->fw_ver_minor,
692			 mgp->fw_ver_tiny);
693	}
694	return status;
695}
696
697static int myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp)
698{
699	struct myri10ge_cmd cmd;
700	int status;
701
702	/* probe for IPv6 TSO support */
703	mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO;
704	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
705				   &cmd, 0);
706	if (status == 0) {
707		mgp->max_tso6 = cmd.data0;
708		mgp->features |= NETIF_F_TSO6;
709	}
710
711	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0);
712	if (status != 0) {
713		dev_err(&mgp->pdev->dev,
714			"failed MXGEFW_CMD_GET_RX_RING_SIZE\n");
715		return -ENXIO;
716	}
717
718	mgp->max_intr_slots = 2 * (cmd.data0 / sizeof(struct mcp_dma_addr));
719
720	return 0;
721}
722
723static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt)
724{
725	char __iomem *submit;
726	__be32 buf[16] __attribute__ ((__aligned__(8)));
727	u32 dma_low, dma_high, size;
728	int status, i;
729
730	size = 0;
731	status = myri10ge_load_hotplug_firmware(mgp, &size);
732	if (status) {
733		if (!adopt)
734			return status;
735		dev_warn(&mgp->pdev->dev, "hotplug firmware loading failed\n");
736
737		/* Do not attempt to adopt firmware if there
738		 * was a bad crc */
739		if (status == -EIO)
740			return status;
741
742		status = myri10ge_adopt_running_firmware(mgp);
743		if (status != 0) {
744			dev_err(&mgp->pdev->dev,
745				"failed to adopt running firmware\n");
746			return status;
747		}
748		dev_info(&mgp->pdev->dev,
749			 "Successfully adopted running firmware\n");
750		if (mgp->tx_boundary == 4096) {
751			dev_warn(&mgp->pdev->dev,
752				 "Using firmware currently running on NIC"
753				 ".  For optimal\n");
754			dev_warn(&mgp->pdev->dev,
755				 "performance consider loading optimized "
756				 "firmware\n");
757			dev_warn(&mgp->pdev->dev, "via hotplug\n");
758		}
759
760		set_fw_name(mgp, "adopted", false);
761		mgp->tx_boundary = 2048;
762		myri10ge_dummy_rdma(mgp, 1);
763		status = myri10ge_get_firmware_capabilities(mgp);
764		return status;
765	}
766
767	/* clear confirmation addr */
768	mgp->cmd->data = 0;
769	mb();
770
771	/* send a reload command to the bootstrap MCP, and wait for the
772	 *  response in the confirmation address.  The firmware should
773	 * write a -1 there to indicate it is alive and well
774	 */
775	dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus);
776	dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus);
777
778	buf[0] = htonl(dma_high);	/* confirm addr MSW */
779	buf[1] = htonl(dma_low);	/* confirm addr LSW */
780	buf[2] = MYRI10GE_NO_CONFIRM_DATA;	/* confirm data */
781
782	/* FIX: All newest firmware should un-protect the bottom of
783	 * the sram before handoff. However, the very first interfaces
784	 * do not. Therefore the handoff copy must skip the first 8 bytes
785	 */
786	buf[3] = htonl(MYRI10GE_FW_OFFSET + 8);	/* where the code starts */
787	buf[4] = htonl(size - 8);	/* length of code */
788	buf[5] = htonl(8);	/* where to copy to */
789	buf[6] = htonl(0);	/* where to jump to */
790
791	submit = mgp->sram + MXGEFW_BOOT_HANDOFF;
792
793	myri10ge_pio_copy(submit, &buf, sizeof(buf));
794	mb();
795	msleep(1);
796	mb();
797	i = 0;
798	while (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 9) {
799		msleep(1 << i);
800		i++;
801	}
802	if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) {
803		dev_err(&mgp->pdev->dev, "handoff failed\n");
804		return -ENXIO;
805	}
806	myri10ge_dummy_rdma(mgp, 1);
807	status = myri10ge_get_firmware_capabilities(mgp);
808
809	return status;
810}
811
812static int myri10ge_update_mac_address(struct myri10ge_priv *mgp, u8 * addr)
813{
814	struct myri10ge_cmd cmd;
815	int status;
816
817	cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
818		     | (addr[2] << 8) | addr[3]);
819
820	cmd.data1 = ((addr[4] << 8) | (addr[5]));
821
822	status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd, 0);
823	return status;
824}
825
826static int myri10ge_change_pause(struct myri10ge_priv *mgp, int pause)
827{
828	struct myri10ge_cmd cmd;
829	int status, ctl;
830
831	ctl = pause ? MXGEFW_ENABLE_FLOW_CONTROL : MXGEFW_DISABLE_FLOW_CONTROL;
832	status = myri10ge_send_cmd(mgp, ctl, &cmd, 0);
833
834	if (status) {
835		netdev_err(mgp->dev, "Failed to set flow control mode\n");
836		return status;
837	}
838	mgp->pause = pause;
839	return 0;
840}
841
842static void
843myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc, int atomic)
844{
845	struct myri10ge_cmd cmd;
846	int status, ctl;
847
848	ctl = promisc ? MXGEFW_ENABLE_PROMISC : MXGEFW_DISABLE_PROMISC;
849	status = myri10ge_send_cmd(mgp, ctl, &cmd, atomic);
850	if (status)
851		netdev_err(mgp->dev, "Failed to set promisc mode\n");
852}
853
854static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
855{
856	struct myri10ge_cmd cmd;
857	int status;
858	u32 len;
859	struct page *dmatest_page;
860	dma_addr_t dmatest_bus;
861	char *test = " ";
862
863	dmatest_page = alloc_page(GFP_KERNEL);
864	if (!dmatest_page)
865		return -ENOMEM;
866	dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE,
867				   DMA_BIDIRECTIONAL);
868
869	/* Run a small DMA test.
870	 * The magic multipliers to the length tell the firmware
871	 * to do DMA read, write, or read+write tests.  The
872	 * results are returned in cmd.data0.  The upper 16
873	 * bits or the return is the number of transfers completed.
874	 * The lower 16 bits is the time in 0.5us ticks that the
875	 * transfers took to complete.
876	 */
877
878	len = mgp->tx_boundary;
879
880	cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus);
881	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus);
882	cmd.data2 = len * 0x10000;
883	status = myri10ge_send_cmd(mgp, test_type, &cmd, 0);
884	if (status != 0) {
885		test = "read";
886		goto abort;
887	}
888	mgp->read_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff);
889	cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus);
890	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus);
891	cmd.data2 = len * 0x1;
892	status = myri10ge_send_cmd(mgp, test_type, &cmd, 0);
893	if (status != 0) {
894		test = "write";
895		goto abort;
896	}
897	mgp->write_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff);
898
899	cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus);
900	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus);
901	cmd.data2 = len * 0x10001;
902	status = myri10ge_send_cmd(mgp, test_type, &cmd, 0);
903	if (status != 0) {
904		test = "read/write";
905		goto abort;
906	}
907	mgp->read_write_dma = ((cmd.data0 >> 16) * len * 2 * 2) /
908	    (cmd.data0 & 0xffff);
909
910abort:
911	pci_unmap_page(mgp->pdev, dmatest_bus, PAGE_SIZE, DMA_BIDIRECTIONAL);
912	put_page(dmatest_page);
913
914	if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
915		dev_warn(&mgp->pdev->dev, "DMA %s benchmark failed: %d\n",
916			 test, status);
917
918	return status;
919}
920
921static int myri10ge_reset(struct myri10ge_priv *mgp)
922{
923	struct myri10ge_cmd cmd;
924	struct myri10ge_slice_state *ss;
925	int i, status;
926	size_t bytes;
927#ifdef CONFIG_MYRI10GE_DCA
928	unsigned long dca_tag_off;
929#endif
930
931	/* try to send a reset command to the card to see if it
932	 * is alive */
933	memset(&cmd, 0, sizeof(cmd));
934	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0);
935	if (status != 0) {
936		dev_err(&mgp->pdev->dev, "failed reset\n");
937		return -ENXIO;
938	}
939
940	(void)myri10ge_dma_test(mgp, MXGEFW_DMA_TEST);
941	/*
942	 * Use non-ndis mcp_slot (eg, 4 bytes total,
943	 * no toeplitz hash value returned.  Older firmware will
944	 * not understand this command, but will use the correct
945	 * sized mcp_slot, so we ignore error returns
946	 */
947	cmd.data0 = MXGEFW_RSS_MCP_SLOT_TYPE_MIN;
948	(void)myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE, &cmd, 0);
949
950	/* Now exchange information about interrupts  */
951
952	bytes = mgp->max_intr_slots * sizeof(*mgp->ss[0].rx_done.entry);
953	cmd.data0 = (u32) bytes;
954	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0);
955
956	/*
957	 * Even though we already know how many slices are supported
958	 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
959	 * has magic side effects, and must be called after a reset.
960	 * It must be called prior to calling any RSS related cmds,
961	 * including assigning an interrupt queue for anything but
962	 * slice 0.  It must also be called *after*
963	 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
964	 * the firmware to compute offsets.
965	 */
966
967	if (mgp->num_slices > 1) {
968
969		/* ask the maximum number of slices it supports */
970		status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
971					   &cmd, 0);
972		if (status != 0) {
973			dev_err(&mgp->pdev->dev,
974				"failed to get number of slices\n");
975		}
976
977		/*
978		 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
979		 * to setting up the interrupt queue DMA
980		 */
981
982		cmd.data0 = mgp->num_slices;
983		cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
984		if (mgp->dev->real_num_tx_queues > 1)
985			cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
986		status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
987					   &cmd, 0);
988
989		/* Firmware older than 1.4.32 only supports multiple
990		 * RX queues, so if we get an error, first retry using a
991		 * single TX queue before giving up */
992		if (status != 0 && mgp->dev->real_num_tx_queues > 1) {
993			mgp->dev->real_num_tx_queues = 1;
994			cmd.data0 = mgp->num_slices;
995			cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
996			status = myri10ge_send_cmd(mgp,
997						   MXGEFW_CMD_ENABLE_RSS_QUEUES,
998						   &cmd, 0);
999		}
1000
1001		if (status != 0) {
1002			dev_err(&mgp->pdev->dev,
1003				"failed to set number of slices\n");
1004
1005			return status;
1006		}
1007	}
1008	for (i = 0; i < mgp->num_slices; i++) {
1009		ss = &mgp->ss[i];
1010		cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->rx_done.bus);
1011		cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->rx_done.bus);
1012		cmd.data2 = i;
1013		status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
1014					    &cmd, 0);
1015	};
1016
1017	status |=
1018	    myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd, 0);
1019	for (i = 0; i < mgp->num_slices; i++) {
1020		ss = &mgp->ss[i];
1021		ss->irq_claim =
1022		    (__iomem __be32 *) (mgp->sram + cmd.data0 + 8 * i);
1023	}
1024	status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1025				    &cmd, 0);
1026	mgp->irq_deassert = (__iomem __be32 *) (mgp->sram + cmd.data0);
1027
1028	status |= myri10ge_send_cmd
1029	    (mgp, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd, 0);
1030	mgp->intr_coal_delay_ptr = (__iomem __be32 *) (mgp->sram + cmd.data0);
1031	if (status != 0) {
1032		dev_err(&mgp->pdev->dev, "failed set interrupt parameters\n");
1033		return status;
1034	}
1035	put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr);
1036
1037#ifdef CONFIG_MYRI10GE_DCA
1038	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_DCA_OFFSET, &cmd, 0);
1039	dca_tag_off = cmd.data0;
1040	for (i = 0; i < mgp->num_slices; i++) {
1041		ss = &mgp->ss[i];
1042		if (status == 0) {
1043			ss->dca_tag = (__iomem __be32 *)
1044			    (mgp->sram + dca_tag_off + 4 * i);
1045		} else {
1046			ss->dca_tag = NULL;
1047		}
1048	}
1049#endif				/* CONFIG_MYRI10GE_DCA */
1050
1051	/* reset mcp/driver shared state back to 0 */
1052
1053	mgp->link_changes = 0;
1054	for (i = 0; i < mgp->num_slices; i++) {
1055		ss = &mgp->ss[i];
1056
1057		memset(ss->rx_done.entry, 0, bytes);
1058		ss->tx.req = 0;
1059		ss->tx.done = 0;
1060		ss->tx.pkt_start = 0;
1061		ss->tx.pkt_done = 0;
1062		ss->rx_big.cnt = 0;
1063		ss->rx_small.cnt = 0;
1064		ss->rx_done.idx = 0;
1065		ss->rx_done.cnt = 0;
1066		ss->tx.wake_queue = 0;
1067		ss->tx.stop_queue = 0;
1068	}
1069
1070	status = myri10ge_update_mac_address(mgp, mgp->dev->dev_addr);
1071	myri10ge_change_pause(mgp, mgp->pause);
1072	myri10ge_set_multicast_list(mgp->dev);
1073	return status;
1074}
1075
1076#ifdef CONFIG_MYRI10GE_DCA
1077static void
1078myri10ge_write_dca(struct myri10ge_slice_state *ss, int cpu, int tag)
1079{
1080	ss->cpu = cpu;
1081	ss->cached_dca_tag = tag;
1082	put_be32(htonl(tag), ss->dca_tag);
1083}
1084
1085static inline void myri10ge_update_dca(struct myri10ge_slice_state *ss)
1086{
1087	int cpu = get_cpu();
1088	int tag;
1089
1090	if (cpu != ss->cpu) {
1091		tag = dca_get_tag(cpu);
1092		if (ss->cached_dca_tag != tag)
1093			myri10ge_write_dca(ss, cpu, tag);
1094	}
1095	put_cpu();
1096}
1097
1098static void myri10ge_setup_dca(struct myri10ge_priv *mgp)
1099{
1100	int err, i;
1101	struct pci_dev *pdev = mgp->pdev;
1102
1103	if (mgp->ss[0].dca_tag == NULL || mgp->dca_enabled)
1104		return;
1105	if (!myri10ge_dca) {
1106		dev_err(&pdev->dev, "dca disabled by administrator\n");
1107		return;
1108	}
1109	err = dca_add_requester(&pdev->dev);
1110	if (err) {
1111		if (err != -ENODEV)
1112			dev_err(&pdev->dev,
1113				"dca_add_requester() failed, err=%d\n", err);
1114		return;
1115	}
1116	mgp->dca_enabled = 1;
1117	for (i = 0; i < mgp->num_slices; i++)
1118		myri10ge_write_dca(&mgp->ss[i], -1, 0);
1119}
1120
1121static void myri10ge_teardown_dca(struct myri10ge_priv *mgp)
1122{
1123	struct pci_dev *pdev = mgp->pdev;
1124	int err;
1125
1126	if (!mgp->dca_enabled)
1127		return;
1128	mgp->dca_enabled = 0;
1129	err = dca_remove_requester(&pdev->dev);
1130}
1131
1132static int myri10ge_notify_dca_device(struct device *dev, void *data)
1133{
1134	struct myri10ge_priv *mgp;
1135	unsigned long event;
1136
1137	mgp = dev_get_drvdata(dev);
1138	event = *(unsigned long *)data;
1139
1140	if (event == DCA_PROVIDER_ADD)
1141		myri10ge_setup_dca(mgp);
1142	else if (event == DCA_PROVIDER_REMOVE)
1143		myri10ge_teardown_dca(mgp);
1144	return 0;
1145}
1146#endif				/* CONFIG_MYRI10GE_DCA */
1147
1148static inline void
1149myri10ge_submit_8rx(struct mcp_kreq_ether_recv __iomem * dst,
1150		    struct mcp_kreq_ether_recv *src)
1151{
1152	__be32 low;
1153
1154	low = src->addr_low;
1155	src->addr_low = htonl(DMA_BIT_MASK(32));
1156	myri10ge_pio_copy(dst, src, 4 * sizeof(*src));
1157	mb();
1158	myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof(*src));
1159	mb();
1160	src->addr_low = low;
1161	put_be32(low, &dst->addr_low);
1162	mb();
1163}
1164
1165static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, __wsum hw_csum)
1166{
1167	struct vlan_hdr *vh = (struct vlan_hdr *)(skb->data);
1168
1169	if ((skb->protocol == htons(ETH_P_8021Q)) &&
1170	    (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) ||
1171	     vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) {
1172		skb->csum = hw_csum;
1173		skb->ip_summed = CHECKSUM_COMPLETE;
1174	}
1175}
1176
1177static inline void
1178myri10ge_rx_skb_build(struct sk_buff *skb, u8 * va,
1179		      struct skb_frag_struct *rx_frags, int len, int hlen)
1180{
1181	struct skb_frag_struct *skb_frags;
1182
1183	skb->len = skb->data_len = len;
1184	skb->truesize = len + sizeof(struct sk_buff);
1185	/* attach the page(s) */
1186
1187	skb_frags = skb_shinfo(skb)->frags;
1188	while (len > 0) {
1189		memcpy(skb_frags, rx_frags, sizeof(*skb_frags));
1190		len -= rx_frags->size;
1191		skb_frags++;
1192		rx_frags++;
1193		skb_shinfo(skb)->nr_frags++;
1194	}
1195
1196	/* pskb_may_pull is not available in irq context, but
1197	 * skb_pull() (for ether_pad and eth_type_trans()) requires
1198	 * the beginning of the packet in skb_headlen(), move it
1199	 * manually */
1200	skb_copy_to_linear_data(skb, va, hlen);
1201	skb_shinfo(skb)->frags[0].page_offset += hlen;
1202	skb_shinfo(skb)->frags[0].size -= hlen;
1203	skb->data_len -= hlen;
1204	skb->tail += hlen;
1205	skb_pull(skb, MXGEFW_PAD);
1206}
1207
1208static void
1209myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
1210			int bytes, int watchdog)
1211{
1212	struct page *page;
1213	int idx;
1214#if MYRI10GE_ALLOC_SIZE > 4096
1215	int end_offset;
1216#endif
1217
1218	if (unlikely(rx->watchdog_needed && !watchdog))
1219		return;
1220
1221	/* try to refill entire ring */
1222	while (rx->fill_cnt != (rx->cnt + rx->mask + 1)) {
1223		idx = rx->fill_cnt & rx->mask;
1224		if (rx->page_offset + bytes <= MYRI10GE_ALLOC_SIZE) {
1225			/* we can use part of previous page */
1226			get_page(rx->page);
1227		} else {
1228			/* we need a new page */
1229			page =
1230			    alloc_pages(GFP_ATOMIC | __GFP_COMP,
1231					MYRI10GE_ALLOC_ORDER);
1232			if (unlikely(page == NULL)) {
1233				if (rx->fill_cnt - rx->cnt < 16)
1234					rx->watchdog_needed = 1;
1235				return;
1236			}
1237			rx->page = page;
1238			rx->page_offset = 0;
1239			rx->bus = pci_map_page(mgp->pdev, page, 0,
1240					       MYRI10GE_ALLOC_SIZE,
1241					       PCI_DMA_FROMDEVICE);
1242		}
1243		rx->info[idx].page = rx->page;
1244		rx->info[idx].page_offset = rx->page_offset;
1245		/* note that this is the address of the start of the
1246		 * page */
1247		dma_unmap_addr_set(&rx->info[idx], bus, rx->bus);
1248		rx->shadow[idx].addr_low =
1249		    htonl(MYRI10GE_LOWPART_TO_U32(rx->bus) + rx->page_offset);
1250		rx->shadow[idx].addr_high =
1251		    htonl(MYRI10GE_HIGHPART_TO_U32(rx->bus));
1252
1253		/* start next packet on a cacheline boundary */
1254		rx->page_offset += SKB_DATA_ALIGN(bytes);
1255
1256#if MYRI10GE_ALLOC_SIZE > 4096
1257		/* don't cross a 4KB boundary */
1258		end_offset = rx->page_offset + bytes - 1;
1259		if ((unsigned)(rx->page_offset ^ end_offset) > 4095)
1260			rx->page_offset = end_offset & ~4095;
1261#endif
1262		rx->fill_cnt++;
1263
1264		/* copy 8 descriptors to the firmware at a time */
1265		if ((idx & 7) == 7) {
1266			myri10ge_submit_8rx(&rx->lanai[idx - 7],
1267					    &rx->shadow[idx - 7]);
1268		}
1269	}
1270}
1271
1272static inline void
1273myri10ge_unmap_rx_page(struct pci_dev *pdev,
1274		       struct myri10ge_rx_buffer_state *info, int bytes)
1275{
1276	/* unmap the recvd page if we're the only or last user of it */
1277	if (bytes >= MYRI10GE_ALLOC_SIZE / 2 ||
1278	    (info->page_offset + 2 * bytes) > MYRI10GE_ALLOC_SIZE) {
1279		pci_unmap_page(pdev, (dma_unmap_addr(info, bus)
1280				      & ~(MYRI10GE_ALLOC_SIZE - 1)),
1281			       MYRI10GE_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
1282	}
1283}
1284
1285#define MYRI10GE_HLEN 64	/* The number of bytes to copy from a
1286				 * page into an skb */
1287
1288static inline int
1289myri10ge_rx_done(struct myri10ge_slice_state *ss, struct myri10ge_rx_buf *rx,
1290		 int bytes, int len, __wsum csum)
1291{
1292	struct myri10ge_priv *mgp = ss->mgp;
1293	struct sk_buff *skb;
1294	struct skb_frag_struct rx_frags[MYRI10GE_MAX_FRAGS_PER_FRAME];
1295	int i, idx, hlen, remainder;
1296	struct pci_dev *pdev = mgp->pdev;
1297	struct net_device *dev = mgp->dev;
1298	u8 *va;
1299
1300	len += MXGEFW_PAD;
1301	idx = rx->cnt & rx->mask;
1302	va = page_address(rx->info[idx].page) + rx->info[idx].page_offset;
1303	prefetch(va);
1304	/* Fill skb_frag_struct(s) with data from our receive */
1305	for (i = 0, remainder = len; remainder > 0; i++) {
1306		myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes);
1307		rx_frags[i].page = rx->info[idx].page;
1308		rx_frags[i].page_offset = rx->info[idx].page_offset;
1309		if (remainder < MYRI10GE_ALLOC_SIZE)
1310			rx_frags[i].size = remainder;
1311		else
1312			rx_frags[i].size = MYRI10GE_ALLOC_SIZE;
1313		rx->cnt++;
1314		idx = rx->cnt & rx->mask;
1315		remainder -= MYRI10GE_ALLOC_SIZE;
1316	}
1317
1318	if (dev->features & NETIF_F_LRO) {
1319		rx_frags[0].page_offset += MXGEFW_PAD;
1320		rx_frags[0].size -= MXGEFW_PAD;
1321		len -= MXGEFW_PAD;
1322		lro_receive_frags(&ss->rx_done.lro_mgr, rx_frags,
1323				  /* opaque, will come back in get_frag_header */
1324				  len, len,
1325				  (void *)(__force unsigned long)csum, csum);
1326
1327		return 1;
1328	}
1329
1330	hlen = MYRI10GE_HLEN > len ? len : MYRI10GE_HLEN;
1331
1332	/* allocate an skb to attach the page(s) to. This is done
1333	 * after trying LRO, so as to avoid skb allocation overheads */
1334
1335	skb = netdev_alloc_skb(dev, MYRI10GE_HLEN + 16);
1336	if (unlikely(skb == NULL)) {
1337		ss->stats.rx_dropped++;
1338		do {
1339			i--;
1340			put_page(rx_frags[i].page);
1341		} while (i != 0);
1342		return 0;
1343	}
1344
1345	/* Attach the pages to the skb, and trim off any padding */
1346	myri10ge_rx_skb_build(skb, va, rx_frags, len, hlen);
1347	if (skb_shinfo(skb)->frags[0].size <= 0) {
1348		put_page(skb_shinfo(skb)->frags[0].page);
1349		skb_shinfo(skb)->nr_frags = 0;
1350	}
1351	skb->protocol = eth_type_trans(skb, dev);
1352	skb_record_rx_queue(skb, ss - &mgp->ss[0]);
1353
1354	if (mgp->csum_flag) {
1355		if ((skb->protocol == htons(ETH_P_IP)) ||
1356		    (skb->protocol == htons(ETH_P_IPV6))) {
1357			skb->csum = csum;
1358			skb->ip_summed = CHECKSUM_COMPLETE;
1359		} else
1360			myri10ge_vlan_ip_csum(skb, csum);
1361	}
1362	netif_receive_skb(skb);
1363	return 1;
1364}
1365
1366static inline void
1367myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
1368{
1369	struct pci_dev *pdev = ss->mgp->pdev;
1370	struct myri10ge_tx_buf *tx = &ss->tx;
1371	struct netdev_queue *dev_queue;
1372	struct sk_buff *skb;
1373	int idx, len;
1374
1375	while (tx->pkt_done != mcp_index) {
1376		idx = tx->done & tx->mask;
1377		skb = tx->info[idx].skb;
1378
1379		/* Mark as free */
1380		tx->info[idx].skb = NULL;
1381		if (tx->info[idx].last) {
1382			tx->pkt_done++;
1383			tx->info[idx].last = 0;
1384		}
1385		tx->done++;
1386		len = dma_unmap_len(&tx->info[idx], len);
1387		dma_unmap_len_set(&tx->info[idx], len, 0);
1388		if (skb) {
1389			ss->stats.tx_bytes += skb->len;
1390			ss->stats.tx_packets++;
1391			dev_kfree_skb_irq(skb);
1392			if (len)
1393				pci_unmap_single(pdev,
1394						 dma_unmap_addr(&tx->info[idx],
1395								bus), len,
1396						 PCI_DMA_TODEVICE);
1397		} else {
1398			if (len)
1399				pci_unmap_page(pdev,
1400					       dma_unmap_addr(&tx->info[idx],
1401							      bus), len,
1402					       PCI_DMA_TODEVICE);
1403		}
1404	}
1405
1406	dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss);
1407	/*
1408	 * Make a minimal effort to prevent the NIC from polling an
1409	 * idle tx queue.  If we can't get the lock we leave the queue
1410	 * active. In this case, either a thread was about to start
1411	 * using the queue anyway, or we lost a race and the NIC will
1412	 * waste some of its resources polling an inactive queue for a
1413	 * while.
1414	 */
1415
1416	if ((ss->mgp->dev->real_num_tx_queues > 1) &&
1417	    __netif_tx_trylock(dev_queue)) {
1418		if (tx->req == tx->done) {
1419			tx->queue_active = 0;
1420			put_be32(htonl(1), tx->send_stop);
1421			mb();
1422			mmiowb();
1423		}
1424		__netif_tx_unlock(dev_queue);
1425	}
1426
1427	/* start the queue if we've stopped it */
1428	if (netif_tx_queue_stopped(dev_queue) &&
1429	    tx->req - tx->done < (tx->mask >> 1)) {
1430		tx->wake_queue++;
1431		netif_tx_wake_queue(dev_queue);
1432	}
1433}
1434
1435static inline int
1436myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget)
1437{
1438	struct myri10ge_rx_done *rx_done = &ss->rx_done;
1439	struct myri10ge_priv *mgp = ss->mgp;
1440	struct net_device *netdev = mgp->dev;
1441	unsigned long rx_bytes = 0;
1442	unsigned long rx_packets = 0;
1443	unsigned long rx_ok;
1444
1445	int idx = rx_done->idx;
1446	int cnt = rx_done->cnt;
1447	int work_done = 0;
1448	u16 length;
1449	__wsum checksum;
1450
1451	while (rx_done->entry[idx].length != 0 && work_done < budget) {
1452		length = ntohs(rx_done->entry[idx].length);
1453		rx_done->entry[idx].length = 0;
1454		checksum = csum_unfold(rx_done->entry[idx].checksum);
1455		if (length <= mgp->small_bytes)
1456			rx_ok = myri10ge_rx_done(ss, &ss->rx_small,
1457						 mgp->small_bytes,
1458						 length, checksum);
1459		else
1460			rx_ok = myri10ge_rx_done(ss, &ss->rx_big,
1461						 mgp->big_bytes,
1462						 length, checksum);
1463		rx_packets += rx_ok;
1464		rx_bytes += rx_ok * (unsigned long)length;
1465		cnt++;
1466		idx = cnt & (mgp->max_intr_slots - 1);
1467		work_done++;
1468	}
1469	rx_done->idx = idx;
1470	rx_done->cnt = cnt;
1471	ss->stats.rx_packets += rx_packets;
1472	ss->stats.rx_bytes += rx_bytes;
1473
1474	if (netdev->features & NETIF_F_LRO)
1475		lro_flush_all(&rx_done->lro_mgr);
1476
1477	/* restock receive rings if needed */
1478	if (ss->rx_small.fill_cnt - ss->rx_small.cnt < myri10ge_fill_thresh)
1479		myri10ge_alloc_rx_pages(mgp, &ss->rx_small,
1480					mgp->small_bytes + MXGEFW_PAD, 0);
1481	if (ss->rx_big.fill_cnt - ss->rx_big.cnt < myri10ge_fill_thresh)
1482		myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0);
1483
1484	return work_done;
1485}
1486
1487static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp)
1488{
1489	struct mcp_irq_data *stats = mgp->ss[0].fw_stats;
1490
1491	if (unlikely(stats->stats_updated)) {
1492		unsigned link_up = ntohl(stats->link_up);
1493		if (mgp->link_state != link_up) {
1494			mgp->link_state = link_up;
1495
1496			if (mgp->link_state == MXGEFW_LINK_UP) {
1497				if (netif_msg_link(mgp))
1498					netdev_info(mgp->dev, "link up\n");
1499				netif_carrier_on(mgp->dev);
1500				mgp->link_changes++;
1501			} else {
1502				if (netif_msg_link(mgp))
1503					netdev_info(mgp->dev, "link %s\n",
1504					    link_up == MXGEFW_LINK_MYRINET ?
1505					    "mismatch (Myrinet detected)" :
1506					    "down");
1507				netif_carrier_off(mgp->dev);
1508				mgp->link_changes++;
1509			}
1510		}
1511		if (mgp->rdma_tags_available !=
1512		    ntohl(stats->rdma_tags_available)) {
1513			mgp->rdma_tags_available =
1514			    ntohl(stats->rdma_tags_available);
1515			netdev_warn(mgp->dev, "RDMA timed out! %d tags left\n",
1516				    mgp->rdma_tags_available);
1517		}
1518		mgp->down_cnt += stats->link_down;
1519		if (stats->link_down)
1520			wake_up(&mgp->down_wq);
1521	}
1522}
1523
1524static int myri10ge_poll(struct napi_struct *napi, int budget)
1525{
1526	struct myri10ge_slice_state *ss =
1527	    container_of(napi, struct myri10ge_slice_state, napi);
1528	int work_done;
1529
1530#ifdef CONFIG_MYRI10GE_DCA
1531	if (ss->mgp->dca_enabled)
1532		myri10ge_update_dca(ss);
1533#endif
1534
1535	/* process as many rx events as NAPI will allow */
1536	work_done = myri10ge_clean_rx_done(ss, budget);
1537
1538	if (work_done < budget) {
1539		napi_complete(napi);
1540		put_be32(htonl(3), ss->irq_claim);
1541	}
1542	return work_done;
1543}
1544
1545static irqreturn_t myri10ge_intr(int irq, void *arg)
1546{
1547	struct myri10ge_slice_state *ss = arg;
1548	struct myri10ge_priv *mgp = ss->mgp;
1549	struct mcp_irq_data *stats = ss->fw_stats;
1550	struct myri10ge_tx_buf *tx = &ss->tx;
1551	u32 send_done_count;
1552	int i;
1553
1554	/* an interrupt on a non-zero receive-only slice is implicitly
1555	 * valid  since MSI-X irqs are not shared */
1556	if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
1557		napi_schedule(&ss->napi);
1558		return (IRQ_HANDLED);
1559	}
1560
1561	/* make sure it is our IRQ, and that the DMA has finished */
1562	if (unlikely(!stats->valid))
1563		return (IRQ_NONE);
1564
1565	/* low bit indicates receives are present, so schedule
1566	 * napi poll handler */
1567	if (stats->valid & 1)
1568		napi_schedule(&ss->napi);
1569
1570	if (!mgp->msi_enabled && !mgp->msix_enabled) {
1571		put_be32(0, mgp->irq_deassert);
1572		if (!myri10ge_deassert_wait)
1573			stats->valid = 0;
1574		mb();
1575	} else
1576		stats->valid = 0;
1577
1578	/* Wait for IRQ line to go low, if using INTx */
1579	i = 0;
1580	while (1) {
1581		i++;
1582		/* check for transmit completes and receives */
1583		send_done_count = ntohl(stats->send_done_count);
1584		if (send_done_count != tx->pkt_done)
1585			myri10ge_tx_done(ss, (int)send_done_count);
1586		if (unlikely(i > myri10ge_max_irq_loops)) {
1587			netdev_err(mgp->dev, "irq stuck?\n");
1588			stats->valid = 0;
1589			schedule_work(&mgp->watchdog_work);
1590		}
1591		if (likely(stats->valid == 0))
1592			break;
1593		cpu_relax();
1594		barrier();
1595	}
1596
1597	/* Only slice 0 updates stats */
1598	if (ss == mgp->ss)
1599		myri10ge_check_statblock(mgp);
1600
1601	put_be32(htonl(3), ss->irq_claim + 1);
1602	return (IRQ_HANDLED);
1603}
1604
1605static int
1606myri10ge_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
1607{
1608	struct myri10ge_priv *mgp = netdev_priv(netdev);
1609	char *ptr;
1610	int i;
1611
1612	cmd->autoneg = AUTONEG_DISABLE;
1613	cmd->speed = SPEED_10000;
1614	cmd->duplex = DUPLEX_FULL;
1615
1616	/*
1617	 * parse the product code to deterimine the interface type
1618	 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
1619	 * after the 3rd dash in the driver's cached copy of the
1620	 * EEPROM's product code string.
1621	 */
1622	ptr = mgp->product_code_string;
1623	if (ptr == NULL) {
1624		netdev_err(netdev, "Missing product code\n");
1625		return 0;
1626	}
1627	for (i = 0; i < 3; i++, ptr++) {
1628		ptr = strchr(ptr, '-');
1629		if (ptr == NULL) {
1630			netdev_err(netdev, "Invalid product code %s\n",
1631				   mgp->product_code_string);
1632			return 0;
1633		}
1634	}
1635	if (*ptr == '2')
1636		ptr++;
1637	if (*ptr == 'R' || *ptr == 'Q' || *ptr == 'S') {
1638		/* We've found either an XFP, quad ribbon fiber, or SFP+ */
1639		cmd->port = PORT_FIBRE;
1640		cmd->supported |= SUPPORTED_FIBRE;
1641		cmd->advertising |= ADVERTISED_FIBRE;
1642	} else {
1643		cmd->port = PORT_OTHER;
1644	}
1645	if (*ptr == 'R' || *ptr == 'S')
1646		cmd->transceiver = XCVR_EXTERNAL;
1647	else
1648		cmd->transceiver = XCVR_INTERNAL;
1649
1650	return 0;
1651}
1652
1653static void
1654myri10ge_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info)
1655{
1656	struct myri10ge_priv *mgp = netdev_priv(netdev);
1657
1658	strlcpy(info->driver, "myri10ge", sizeof(info->driver));
1659	strlcpy(info->version, MYRI10GE_VERSION_STR, sizeof(info->version));
1660	strlcpy(info->fw_version, mgp->fw_version, sizeof(info->fw_version));
1661	strlcpy(info->bus_info, pci_name(mgp->pdev), sizeof(info->bus_info));
1662}
1663
1664static int
1665myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
1666{
1667	struct myri10ge_priv *mgp = netdev_priv(netdev);
1668
1669	coal->rx_coalesce_usecs = mgp->intr_coal_delay;
1670	return 0;
1671}
1672
1673static int
1674myri10ge_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
1675{
1676	struct myri10ge_priv *mgp = netdev_priv(netdev);
1677
1678	mgp->intr_coal_delay = coal->rx_coalesce_usecs;
1679	put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr);
1680	return 0;
1681}
1682
1683static void
1684myri10ge_get_pauseparam(struct net_device *netdev,
1685			struct ethtool_pauseparam *pause)
1686{
1687	struct myri10ge_priv *mgp = netdev_priv(netdev);
1688
1689	pause->autoneg = 0;
1690	pause->rx_pause = mgp->pause;
1691	pause->tx_pause = mgp->pause;
1692}
1693
1694static int
1695myri10ge_set_pauseparam(struct net_device *netdev,
1696			struct ethtool_pauseparam *pause)
1697{
1698	struct myri10ge_priv *mgp = netdev_priv(netdev);
1699
1700	if (pause->tx_pause != mgp->pause)
1701		return myri10ge_change_pause(mgp, pause->tx_pause);
1702	if (pause->rx_pause != mgp->pause)
1703		return myri10ge_change_pause(mgp, pause->rx_pause);
1704	if (pause->autoneg != 0)
1705		return -EINVAL;
1706	return 0;
1707}
1708
1709static void
1710myri10ge_get_ringparam(struct net_device *netdev,
1711		       struct ethtool_ringparam *ring)
1712{
1713	struct myri10ge_priv *mgp = netdev_priv(netdev);
1714
1715	ring->rx_mini_max_pending = mgp->ss[0].rx_small.mask + 1;
1716	ring->rx_max_pending = mgp->ss[0].rx_big.mask + 1;
1717	ring->rx_jumbo_max_pending = 0;
1718	ring->tx_max_pending = mgp->ss[0].tx.mask + 1;
1719	ring->rx_mini_pending = ring->rx_mini_max_pending;
1720	ring->rx_pending = ring->rx_max_pending;
1721	ring->rx_jumbo_pending = ring->rx_jumbo_max_pending;
1722	ring->tx_pending = ring->tx_max_pending;
1723}
1724
1725static u32 myri10ge_get_rx_csum(struct net_device *netdev)
1726{
1727	struct myri10ge_priv *mgp = netdev_priv(netdev);
1728
1729	if (mgp->csum_flag)
1730		return 1;
1731	else
1732		return 0;
1733}
1734
1735static int myri10ge_set_rx_csum(struct net_device *netdev, u32 csum_enabled)
1736{
1737	struct myri10ge_priv *mgp = netdev_priv(netdev);
1738	int err = 0;
1739
1740	if (csum_enabled)
1741		mgp->csum_flag = MXGEFW_FLAGS_CKSUM;
1742	else {
1743		netdev->features &= ~NETIF_F_LRO;
1744		mgp->csum_flag = 0;
1745
1746	}
1747	return err;
1748}
1749
1750static int myri10ge_set_tso(struct net_device *netdev, u32 tso_enabled)
1751{
1752	struct myri10ge_priv *mgp = netdev_priv(netdev);
1753	unsigned long flags = mgp->features & (NETIF_F_TSO6 | NETIF_F_TSO);
1754
1755	if (tso_enabled)
1756		netdev->features |= flags;
1757	else
1758		netdev->features &= ~flags;
1759	return 0;
1760}
1761
1762static const char myri10ge_gstrings_main_stats[][ETH_GSTRING_LEN] = {
1763	"rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
1764	"tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
1765	"rx_length_errors", "rx_over_errors", "rx_crc_errors",
1766	"rx_frame_errors", "rx_fifo_errors", "rx_missed_errors",
1767	"tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors",
1768	"tx_heartbeat_errors", "tx_window_errors",
1769	/* device-specific stats */
1770	"tx_boundary", "WC", "irq", "MSI", "MSIX",
1771	"read_dma_bw_MBs", "write_dma_bw_MBs", "read_write_dma_bw_MBs",
1772	"serial_number", "watchdog_resets",
1773#ifdef CONFIG_MYRI10GE_DCA
1774	"dca_capable_firmware", "dca_device_present",
1775#endif
1776	"link_changes", "link_up", "dropped_link_overflow",
1777	"dropped_link_error_or_filtered",
1778	"dropped_pause", "dropped_bad_phy", "dropped_bad_crc32",
1779	"dropped_unicast_filtered", "dropped_multicast_filtered",
1780	"dropped_runt", "dropped_overrun", "dropped_no_small_buffer",
1781	"dropped_no_big_buffer"
1782};
1783
1784static const char myri10ge_gstrings_slice_stats[][ETH_GSTRING_LEN] = {
1785	"----------- slice ---------",
1786	"tx_pkt_start", "tx_pkt_done", "tx_req", "tx_done",
1787	"rx_small_cnt", "rx_big_cnt",
1788	"wake_queue", "stop_queue", "tx_linearized", "LRO aggregated",
1789	    "LRO flushed",
1790	"LRO avg aggr", "LRO no_desc"
1791};
1792
1793#define MYRI10GE_NET_STATS_LEN      21
1794#define MYRI10GE_MAIN_STATS_LEN  ARRAY_SIZE(myri10ge_gstrings_main_stats)
1795#define MYRI10GE_SLICE_STATS_LEN  ARRAY_SIZE(myri10ge_gstrings_slice_stats)
1796
1797static void
1798myri10ge_get_strings(struct net_device *netdev, u32 stringset, u8 * data)
1799{
1800	struct myri10ge_priv *mgp = netdev_priv(netdev);
1801	int i;
1802
1803	switch (stringset) {
1804	case ETH_SS_STATS:
1805		memcpy(data, *myri10ge_gstrings_main_stats,
1806		       sizeof(myri10ge_gstrings_main_stats));
1807		data += sizeof(myri10ge_gstrings_main_stats);
1808		for (i = 0; i < mgp->num_slices; i++) {
1809			memcpy(data, *myri10ge_gstrings_slice_stats,
1810			       sizeof(myri10ge_gstrings_slice_stats));
1811			data += sizeof(myri10ge_gstrings_slice_stats);
1812		}
1813		break;
1814	}
1815}
1816
1817static int myri10ge_get_sset_count(struct net_device *netdev, int sset)
1818{
1819	struct myri10ge_priv *mgp = netdev_priv(netdev);
1820
1821	switch (sset) {
1822	case ETH_SS_STATS:
1823		return MYRI10GE_MAIN_STATS_LEN +
1824		    mgp->num_slices * MYRI10GE_SLICE_STATS_LEN;
1825	default:
1826		return -EOPNOTSUPP;
1827	}
1828}
1829
1830static void
1831myri10ge_get_ethtool_stats(struct net_device *netdev,
1832			   struct ethtool_stats *stats, u64 * data)
1833{
1834	struct myri10ge_priv *mgp = netdev_priv(netdev);
1835	struct myri10ge_slice_state *ss;
1836	int slice;
1837	int i;
1838
1839	/* force stats update */
1840	(void)myri10ge_get_stats(netdev);
1841	for (i = 0; i < MYRI10GE_NET_STATS_LEN; i++)
1842		data[i] = ((unsigned long *)&netdev->stats)[i];
1843
1844	data[i++] = (unsigned int)mgp->tx_boundary;
1845	data[i++] = (unsigned int)mgp->wc_enabled;
1846	data[i++] = (unsigned int)mgp->pdev->irq;
1847	data[i++] = (unsigned int)mgp->msi_enabled;
1848	data[i++] = (unsigned int)mgp->msix_enabled;
1849	data[i++] = (unsigned int)mgp->read_dma;
1850	data[i++] = (unsigned int)mgp->write_dma;
1851	data[i++] = (unsigned int)mgp->read_write_dma;
1852	data[i++] = (unsigned int)mgp->serial_number;
1853	data[i++] = (unsigned int)mgp->watchdog_resets;
1854#ifdef CONFIG_MYRI10GE_DCA
1855	data[i++] = (unsigned int)(mgp->ss[0].dca_tag != NULL);
1856	data[i++] = (unsigned int)(mgp->dca_enabled);
1857#endif
1858	data[i++] = (unsigned int)mgp->link_changes;
1859
1860	/* firmware stats are useful only in the first slice */
1861	ss = &mgp->ss[0];
1862	data[i++] = (unsigned int)ntohl(ss->fw_stats->link_up);
1863	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_link_overflow);
1864	data[i++] =
1865	    (unsigned int)ntohl(ss->fw_stats->dropped_link_error_or_filtered);
1866	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_pause);
1867	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_phy);
1868	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_crc32);
1869	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_unicast_filtered);
1870	data[i++] =
1871	    (unsigned int)ntohl(ss->fw_stats->dropped_multicast_filtered);
1872	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_runt);
1873	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_overrun);
1874	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_small_buffer);
1875	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_big_buffer);
1876
1877	for (slice = 0; slice < mgp->num_slices; slice++) {
1878		ss = &mgp->ss[slice];
1879		data[i++] = slice;
1880		data[i++] = (unsigned int)ss->tx.pkt_start;
1881		data[i++] = (unsigned int)ss->tx.pkt_done;
1882		data[i++] = (unsigned int)ss->tx.req;
1883		data[i++] = (unsigned int)ss->tx.done;
1884		data[i++] = (unsigned int)ss->rx_small.cnt;
1885		data[i++] = (unsigned int)ss->rx_big.cnt;
1886		data[i++] = (unsigned int)ss->tx.wake_queue;
1887		data[i++] = (unsigned int)ss->tx.stop_queue;
1888		data[i++] = (unsigned int)ss->tx.linearized;
1889		data[i++] = ss->rx_done.lro_mgr.stats.aggregated;
1890		data[i++] = ss->rx_done.lro_mgr.stats.flushed;
1891		if (ss->rx_done.lro_mgr.stats.flushed)
1892			data[i++] = ss->rx_done.lro_mgr.stats.aggregated /
1893			    ss->rx_done.lro_mgr.stats.flushed;
1894		else
1895			data[i++] = 0;
1896		data[i++] = ss->rx_done.lro_mgr.stats.no_desc;
1897	}
1898}
1899
1900static void myri10ge_set_msglevel(struct net_device *netdev, u32 value)
1901{
1902	struct myri10ge_priv *mgp = netdev_priv(netdev);
1903	mgp->msg_enable = value;
1904}
1905
1906static u32 myri10ge_get_msglevel(struct net_device *netdev)
1907{
1908	struct myri10ge_priv *mgp = netdev_priv(netdev);
1909	return mgp->msg_enable;
1910}
1911
1912static int myri10ge_set_flags(struct net_device *netdev, u32 value)
1913{
1914	return ethtool_op_set_flags(netdev, value, ETH_FLAG_LRO);
1915}
1916
1917static const struct ethtool_ops myri10ge_ethtool_ops = {
1918	.get_settings = myri10ge_get_settings,
1919	.get_drvinfo = myri10ge_get_drvinfo,
1920	.get_coalesce = myri10ge_get_coalesce,
1921	.set_coalesce = myri10ge_set_coalesce,
1922	.get_pauseparam = myri10ge_get_pauseparam,
1923	.set_pauseparam = myri10ge_set_pauseparam,
1924	.get_ringparam = myri10ge_get_ringparam,
1925	.get_rx_csum = myri10ge_get_rx_csum,
1926	.set_rx_csum = myri10ge_set_rx_csum,
1927	.set_tx_csum = ethtool_op_set_tx_hw_csum,
1928	.set_sg = ethtool_op_set_sg,
1929	.set_tso = myri10ge_set_tso,
1930	.get_link = ethtool_op_get_link,
1931	.get_strings = myri10ge_get_strings,
1932	.get_sset_count = myri10ge_get_sset_count,
1933	.get_ethtool_stats = myri10ge_get_ethtool_stats,
1934	.set_msglevel = myri10ge_set_msglevel,
1935	.get_msglevel = myri10ge_get_msglevel,
1936	.get_flags = ethtool_op_get_flags,
1937	.set_flags = myri10ge_set_flags
1938};
1939
1940static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss)
1941{
1942	struct myri10ge_priv *mgp = ss->mgp;
1943	struct myri10ge_cmd cmd;
1944	struct net_device *dev = mgp->dev;
1945	int tx_ring_size, rx_ring_size;
1946	int tx_ring_entries, rx_ring_entries;
1947	int i, slice, status;
1948	size_t bytes;
1949
1950	/* get ring sizes */
1951	slice = ss - mgp->ss;
1952	cmd.data0 = slice;
1953	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd, 0);
1954	tx_ring_size = cmd.data0;
1955	cmd.data0 = slice;
1956	status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0);
1957	if (status != 0)
1958		return status;
1959	rx_ring_size = cmd.data0;
1960
1961	tx_ring_entries = tx_ring_size / sizeof(struct mcp_kreq_ether_send);
1962	rx_ring_entries = rx_ring_size / sizeof(struct mcp_dma_addr);
1963	ss->tx.mask = tx_ring_entries - 1;
1964	ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
1965
1966	status = -ENOMEM;
1967
1968	/* allocate the host shadow rings */
1969
1970	bytes = 8 + (MYRI10GE_MAX_SEND_DESC_TSO + 4)
1971	    * sizeof(*ss->tx.req_list);
1972	ss->tx.req_bytes = kzalloc(bytes, GFP_KERNEL);
1973	if (ss->tx.req_bytes == NULL)
1974		goto abort_with_nothing;
1975
1976	/* ensure req_list entries are aligned to 8 bytes */
1977	ss->tx.req_list = (struct mcp_kreq_ether_send *)
1978	    ALIGN((unsigned long)ss->tx.req_bytes, 8);
1979	ss->tx.queue_active = 0;
1980
1981	bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow);
1982	ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL);
1983	if (ss->rx_small.shadow == NULL)
1984		goto abort_with_tx_req_bytes;
1985
1986	bytes = rx_ring_entries * sizeof(*ss->rx_big.shadow);
1987	ss->rx_big.shadow = kzalloc(bytes, GFP_KERNEL);
1988	if (ss->rx_big.shadow == NULL)
1989		goto abort_with_rx_small_shadow;
1990
1991	/* allocate the host info rings */
1992
1993	bytes = tx_ring_entries * sizeof(*ss->tx.info);
1994	ss->tx.info = kzalloc(bytes, GFP_KERNEL);
1995	if (ss->tx.info == NULL)
1996		goto abort_with_rx_big_shadow;
1997
1998	bytes = rx_ring_entries * sizeof(*ss->rx_small.info);
1999	ss->rx_small.info = kzalloc(bytes, GFP_KERNEL);
2000	if (ss->rx_small.info == NULL)
2001		goto abort_with_tx_info;
2002
2003	bytes = rx_ring_entries * sizeof(*ss->rx_big.info);
2004	ss->rx_big.info = kzalloc(bytes, GFP_KERNEL);
2005	if (ss->rx_big.info == NULL)
2006		goto abort_with_rx_small_info;
2007
2008	/* Fill the receive rings */
2009	ss->rx_big.cnt = 0;
2010	ss->rx_small.cnt = 0;
2011	ss->rx_big.fill_cnt = 0;
2012	ss->rx_small.fill_cnt = 0;
2013	ss->rx_small.page_offset = MYRI10GE_ALLOC_SIZE;
2014	ss->rx_big.page_offset = MYRI10GE_ALLOC_SIZE;
2015	ss->rx_small.watchdog_needed = 0;
2016	ss->rx_big.watchdog_needed = 0;
2017	myri10ge_alloc_rx_pages(mgp, &ss->rx_small,
2018				mgp->small_bytes + MXGEFW_PAD, 0);
2019
2020	if (ss->rx_small.fill_cnt < ss->rx_small.mask + 1) {
2021		netdev_err(dev, "slice-%d: alloced only %d small bufs\n",
2022			   slice, ss->rx_small.fill_cnt);
2023		goto abort_with_rx_small_ring;
2024	}
2025
2026	myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0);
2027	if (ss->rx_big.fill_cnt < ss->rx_big.mask + 1) {
2028		netdev_err(dev, "slice-%d: alloced only %d big bufs\n",
2029			   slice, ss->rx_big.fill_cnt);
2030		goto abort_with_rx_big_ring;
2031	}
2032
2033	return 0;
2034
2035abort_with_rx_big_ring:
2036	for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) {
2037		int idx = i & ss->rx_big.mask;
2038		myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx],
2039				       mgp->big_bytes);
2040		put_page(ss->rx_big.info[idx].page);
2041	}
2042
2043abort_with_rx_small_ring:
2044	for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) {
2045		int idx = i & ss->rx_small.mask;
2046		myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx],
2047				       mgp->small_bytes + MXGEFW_PAD);
2048		put_page(ss->rx_small.info[idx].page);
2049	}
2050
2051	kfree(ss->rx_big.info);
2052
2053abort_with_rx_small_info:
2054	kfree(ss->rx_small.info);
2055
2056abort_with_tx_info:
2057	kfree(ss->tx.info);
2058
2059abort_with_rx_big_shadow:
2060	kfree(ss->rx_big.shadow);
2061
2062abort_with_rx_small_shadow:
2063	kfree(ss->rx_small.shadow);
2064
2065abort_with_tx_req_bytes:
2066	kfree(ss->tx.req_bytes);
2067	ss->tx.req_bytes = NULL;
2068	ss->tx.req_list = NULL;
2069
2070abort_with_nothing:
2071	return status;
2072}
2073
2074static void myri10ge_free_rings(struct myri10ge_slice_state *ss)
2075{
2076	struct myri10ge_priv *mgp = ss->mgp;
2077	struct sk_buff *skb;
2078	struct myri10ge_tx_buf *tx;
2079	int i, len, idx;
2080
2081	/* If not allocated, skip it */
2082	if (ss->tx.req_list == NULL)
2083		return;
2084
2085	for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) {
2086		idx = i & ss->rx_big.mask;
2087		if (i == ss->rx_big.fill_cnt - 1)
2088			ss->rx_big.info[idx].page_offset = MYRI10GE_ALLOC_SIZE;
2089		myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx],
2090				       mgp->big_bytes);
2091		put_page(ss->rx_big.info[idx].page);
2092	}
2093
2094	for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) {
2095		idx = i & ss->rx_small.mask;
2096		if (i == ss->rx_small.fill_cnt - 1)
2097			ss->rx_small.info[idx].page_offset =
2098			    MYRI10GE_ALLOC_SIZE;
2099		myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx],
2100				       mgp->small_bytes + MXGEFW_PAD);
2101		put_page(ss->rx_small.info[idx].page);
2102	}
2103	tx = &ss->tx;
2104	while (tx->done != tx->req) {
2105		idx = tx->done & tx->mask;
2106		skb = tx->info[idx].skb;
2107
2108		/* Mark as free */
2109		tx->info[idx].skb = NULL;
2110		tx->done++;
2111		len = dma_unmap_len(&tx->info[idx], len);
2112		dma_unmap_len_set(&tx->info[idx], len, 0);
2113		if (skb) {
2114			ss->stats.tx_dropped++;
2115			dev_kfree_skb_any(skb);
2116			if (len)
2117				pci_unmap_single(mgp->pdev,
2118						 dma_unmap_addr(&tx->info[idx],
2119								bus), len,
2120						 PCI_DMA_TODEVICE);
2121		} else {
2122			if (len)
2123				pci_unmap_page(mgp->pdev,
2124					       dma_unmap_addr(&tx->info[idx],
2125							      bus), len,
2126					       PCI_DMA_TODEVICE);
2127		}
2128	}
2129	kfree(ss->rx_big.info);
2130
2131	kfree(ss->rx_small.info);
2132
2133	kfree(ss->tx.info);
2134
2135	kfree(ss->rx_big.shadow);
2136
2137	kfree(ss->rx_small.shadow);
2138
2139	kfree(ss->tx.req_bytes);
2140	ss->tx.req_bytes = NULL;
2141	ss->tx.req_list = NULL;
2142}
2143
2144static int myri10ge_request_irq(struct myri10ge_priv *mgp)
2145{
2146	struct pci_dev *pdev = mgp->pdev;
2147	struct myri10ge_slice_state *ss;
2148	struct net_device *netdev = mgp->dev;
2149	int i;
2150	int status;
2151
2152	mgp->msi_enabled = 0;
2153	mgp->msix_enabled = 0;
2154	status = 0;
2155	if (myri10ge_msi) {
2156		if (mgp->num_slices > 1) {
2157			status =
2158			    pci_enable_msix(pdev, mgp->msix_vectors,
2159					    mgp->num_slices);
2160			if (status == 0) {
2161				mgp->msix_enabled = 1;
2162			} else {
2163				dev_err(&pdev->dev,
2164					"Error %d setting up MSI-X\n", status);
2165				return status;
2166			}
2167		}
2168		if (mgp->msix_enabled == 0) {
2169			status = pci_enable_msi(pdev);
2170			if (status != 0) {
2171				dev_err(&pdev->dev,
2172					"Error %d setting up MSI; falling back to xPIC\n",
2173					status);
2174			} else {
2175				mgp->msi_enabled = 1;
2176			}
2177		}
2178	}
2179	if (mgp->msix_enabled) {
2180		for (i = 0; i < mgp->num_slices; i++) {
2181			ss = &mgp->ss[i];
2182			snprintf(ss->irq_desc, sizeof(ss->irq_desc),
2183				 "%s:slice-%d", netdev->name, i);
2184			status = request_irq(mgp->msix_vectors[i].vector,
2185					     myri10ge_intr, 0, ss->irq_desc,
2186					     ss);
2187			if (status != 0) {
2188				dev_err(&pdev->dev,
2189					"slice %d failed to allocate IRQ\n", i);
2190				i--;
2191				while (i >= 0) {
2192					free_irq(mgp->msix_vectors[i].vector,
2193						 &mgp->ss[i]);
2194					i--;
2195				}
2196				pci_disable_msix(pdev);
2197				return status;
2198			}
2199		}
2200	} else {
2201		status = request_irq(pdev->irq, myri10ge_intr, IRQF_SHARED,
2202				     mgp->dev->name, &mgp->ss[0]);
2203		if (status != 0) {
2204			dev_err(&pdev->dev, "failed to allocate IRQ\n");
2205			if (mgp->msi_enabled)
2206				pci_disable_msi(pdev);
2207		}
2208	}
2209	return status;
2210}
2211
2212static void myri10ge_free_irq(struct myri10ge_priv *mgp)
2213{
2214	struct pci_dev *pdev = mgp->pdev;
2215	int i;
2216
2217	if (mgp->msix_enabled) {
2218		for (i = 0; i < mgp->num_slices; i++)
2219			free_irq(mgp->msix_vectors[i].vector, &mgp->ss[i]);
2220	} else {
2221		free_irq(pdev->irq, &mgp->ss[0]);
2222	}
2223	if (mgp->msi_enabled)
2224		pci_disable_msi(pdev);
2225	if (mgp->msix_enabled)
2226		pci_disable_msix(pdev);
2227}
2228
2229static int
2230myri10ge_get_frag_header(struct skb_frag_struct *frag, void **mac_hdr,
2231			 void **ip_hdr, void **tcpudp_hdr,
2232			 u64 * hdr_flags, void *priv)
2233{
2234	struct ethhdr *eh;
2235	struct vlan_ethhdr *veh;
2236	struct iphdr *iph;
2237	u8 *va = page_address(frag->page) + frag->page_offset;
2238	unsigned long ll_hlen;
2239	/* passed opaque through lro_receive_frags() */
2240	__wsum csum = (__force __wsum) (unsigned long)priv;
2241
2242	/* find the mac header, aborting if not IPv4 */
2243
2244	eh = (struct ethhdr *)va;
2245	*mac_hdr = eh;
2246	ll_hlen = ETH_HLEN;
2247	if (eh->h_proto != htons(ETH_P_IP)) {
2248		if (eh->h_proto == htons(ETH_P_8021Q)) {
2249			veh = (struct vlan_ethhdr *)va;
2250			if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP))
2251				return -1;
2252
2253			ll_hlen += VLAN_HLEN;
2254
2255			/*
2256			 *  HW checksum starts ETH_HLEN bytes into
2257			 *  frame, so we must subtract off the VLAN
2258			 *  header's checksum before csum can be used
2259			 */
2260			csum = csum_sub(csum, csum_partial(va + ETH_HLEN,
2261							   VLAN_HLEN, 0));
2262		} else {
2263			return -1;
2264		}
2265	}
2266	*hdr_flags = LRO_IPV4;
2267
2268	iph = (struct iphdr *)(va + ll_hlen);
2269	*ip_hdr = iph;
2270	if (iph->protocol != IPPROTO_TCP)
2271		return -1;
2272	if (iph->frag_off & htons(IP_MF | IP_OFFSET))
2273		return -1;
2274	*hdr_flags |= LRO_TCP;
2275	*tcpudp_hdr = (u8 *) (*ip_hdr) + (iph->ihl << 2);
2276
2277	/* verify the IP checksum */
2278	if (unlikely(ip_fast_csum((u8 *) iph, iph->ihl)))
2279		return -1;
2280
2281	/* verify the  checksum */
2282	if (unlikely(csum_tcpudp_magic(iph->saddr, iph->daddr,
2283				       ntohs(iph->tot_len) - (iph->ihl << 2),
2284				       IPPROTO_TCP, csum)))
2285		return -1;
2286
2287	return 0;
2288}
2289
2290static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice)
2291{
2292	struct myri10ge_cmd cmd;
2293	struct myri10ge_slice_state *ss;
2294	int status;
2295
2296	ss = &mgp->ss[slice];
2297	status = 0;
2298	if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) {
2299		cmd.data0 = slice;
2300		status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET,
2301					   &cmd, 0);
2302		ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
2303		    (mgp->sram + cmd.data0);
2304	}
2305	cmd.data0 = slice;
2306	status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET,
2307				    &cmd, 0);
2308	ss->rx_small.lanai = (struct mcp_kreq_ether_recv __iomem *)
2309	    (mgp->sram + cmd.data0);
2310
2311	cmd.data0 = slice;
2312	status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd, 0);
2313	ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *)
2314	    (mgp->sram + cmd.data0);
2315
2316	ss->tx.send_go = (__iomem __be32 *)
2317	    (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
2318	ss->tx.send_stop = (__iomem __be32 *)
2319	    (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
2320	return status;
2321
2322}
2323
2324static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice)
2325{
2326	struct myri10ge_cmd cmd;
2327	struct myri10ge_slice_state *ss;
2328	int status;
2329
2330	ss = &mgp->ss[slice];
2331	cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus);
2332	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus);
2333	cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16);
2334	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0);
2335	if (status == -ENOSYS) {
2336		dma_addr_t bus = ss->fw_stats_bus;
2337		if (slice != 0)
2338			return -EINVAL;
2339		bus += offsetof(struct mcp_irq_data, send_done_count);
2340		cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus);
2341		cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus);
2342		status = myri10ge_send_cmd(mgp,
2343					   MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
2344					   &cmd, 0);
2345		/* Firmware cannot support multicast without STATS_DMA_V2 */
2346		mgp->fw_multicast_support = 0;
2347	} else {
2348		mgp->fw_multicast_support = 1;
2349	}
2350	return 0;
2351}
2352
2353static int myri10ge_open(struct net_device *dev)
2354{
2355	struct myri10ge_slice_state *ss;
2356	struct myri10ge_priv *mgp = netdev_priv(dev);
2357	struct myri10ge_cmd cmd;
2358	int i, status, big_pow2, slice;
2359	u8 *itable;
2360	struct net_lro_mgr *lro_mgr;
2361
2362	if (mgp->running != MYRI10GE_ETH_STOPPED)
2363		return -EBUSY;
2364
2365	mgp->running = MYRI10GE_ETH_STARTING;
2366	status = myri10ge_reset(mgp);
2367	if (status != 0) {
2368		netdev_err(dev, "failed reset\n");
2369		goto abort_with_nothing;
2370	}
2371
2372	if (mgp->num_slices > 1) {
2373		cmd.data0 = mgp->num_slices;
2374		cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
2375		if (mgp->dev->real_num_tx_queues > 1)
2376			cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
2377		status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
2378					   &cmd, 0);
2379		if (status != 0) {
2380			netdev_err(dev, "failed to set number of slices\n");
2381			goto abort_with_nothing;
2382		}
2383		/* setup the indirection table */
2384		cmd.data0 = mgp->num_slices;
2385		status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
2386					   &cmd, 0);
2387
2388		status |= myri10ge_send_cmd(mgp,
2389					    MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
2390					    &cmd, 0);
2391		if (status != 0) {
2392			netdev_err(dev, "failed to setup rss tables\n");
2393			goto abort_with_nothing;
2394		}
2395
2396		/* just enable an identity mapping */
2397		itable = mgp->sram + cmd.data0;
2398		for (i = 0; i < mgp->num_slices; i++)
2399			__raw_writeb(i, &itable[i]);
2400
2401		cmd.data0 = 1;
2402		cmd.data1 = myri10ge_rss_hash;
2403		status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
2404					   &cmd, 0);
2405		if (status != 0) {
2406			netdev_err(dev, "failed to enable slices\n");
2407			goto abort_with_nothing;
2408		}
2409	}
2410
2411	status = myri10ge_request_irq(mgp);
2412	if (status != 0)
2413		goto abort_with_nothing;
2414
2415	/* decide what small buffer size to use.  For good TCP rx
2416	 * performance, it is important to not receive 1514 byte
2417	 * frames into jumbo buffers, as it confuses the socket buffer
2418	 * accounting code, leading to drops and erratic performance.
2419	 */
2420
2421	if (dev->mtu <= ETH_DATA_LEN)
2422		/* enough for a TCP header */
2423		mgp->small_bytes = (128 > SMP_CACHE_BYTES)
2424		    ? (128 - MXGEFW_PAD)
2425		    : (SMP_CACHE_BYTES - MXGEFW_PAD);
2426	else
2427		/* enough for a vlan encapsulated ETH_DATA_LEN frame */
2428		mgp->small_bytes = VLAN_ETH_FRAME_LEN;
2429
2430	/* Override the small buffer size? */
2431	if (myri10ge_small_bytes > 0)
2432		mgp->small_bytes = myri10ge_small_bytes;
2433
2434	/* Firmware needs the big buff size as a power of 2.  Lie and
2435	 * tell him the buffer is larger, because we only use 1
2436	 * buffer/pkt, and the mtu will prevent overruns.
2437	 */
2438	big_pow2 = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD;
2439	if (big_pow2 < MYRI10GE_ALLOC_SIZE / 2) {
2440		while (!is_power_of_2(big_pow2))
2441			big_pow2++;
2442		mgp->big_bytes = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD;
2443	} else {
2444		big_pow2 = MYRI10GE_ALLOC_SIZE;
2445		mgp->big_bytes = big_pow2;
2446	}
2447
2448	/* setup the per-slice data structures */
2449	for (slice = 0; slice < mgp->num_slices; slice++) {
2450		ss = &mgp->ss[slice];
2451
2452		status = myri10ge_get_txrx(mgp, slice);
2453		if (status != 0) {
2454			netdev_err(dev, "failed to get ring sizes or locations\n");
2455			goto abort_with_rings;
2456		}
2457		status = myri10ge_allocate_rings(ss);
2458		if (status != 0)
2459			goto abort_with_rings;
2460
2461		/* only firmware which supports multiple TX queues
2462		 * supports setting up the tx stats on non-zero
2463		 * slices */
2464		if (slice == 0 || mgp->dev->real_num_tx_queues > 1)
2465			status = myri10ge_set_stats(mgp, slice);
2466		if (status) {
2467			netdev_err(dev, "Couldn't set stats DMA\n");
2468			goto abort_with_rings;
2469		}
2470
2471		lro_mgr = &ss->rx_done.lro_mgr;
2472		lro_mgr->dev = dev;
2473		lro_mgr->features = LRO_F_NAPI;
2474		lro_mgr->ip_summed = CHECKSUM_COMPLETE;
2475		lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY;
2476		lro_mgr->max_desc = MYRI10GE_MAX_LRO_DESCRIPTORS;
2477		lro_mgr->lro_arr = ss->rx_done.lro_desc;
2478		lro_mgr->get_frag_header = myri10ge_get_frag_header;
2479		lro_mgr->max_aggr = myri10ge_lro_max_pkts;
2480		lro_mgr->frag_align_pad = 2;
2481		if (lro_mgr->max_aggr > MAX_SKB_FRAGS)
2482			lro_mgr->max_aggr = MAX_SKB_FRAGS;
2483
2484		/* must happen prior to any irq */
2485		napi_enable(&(ss)->napi);
2486	}
2487
2488	/* now give firmware buffers sizes, and MTU */
2489	cmd.data0 = dev->mtu + ETH_HLEN + VLAN_HLEN;
2490	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd, 0);
2491	cmd.data0 = mgp->small_bytes;
2492	status |=
2493	    myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd, 0);
2494	cmd.data0 = big_pow2;
2495	status |=
2496	    myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd, 0);
2497	if (status) {
2498		netdev_err(dev, "Couldn't set buffer sizes\n");
2499		goto abort_with_rings;
2500	}
2501
2502	/*
2503	 * Set Linux style TSO mode; this is needed only on newer
2504	 *  firmware versions.  Older versions default to Linux
2505	 *  style TSO
2506	 */
2507	cmd.data0 = 0;
2508	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd, 0);
2509	if (status && status != -ENOSYS) {
2510		netdev_err(dev, "Couldn't set TSO mode\n");
2511		goto abort_with_rings;
2512	}
2513
2514	mgp->link_state = ~0U;
2515	mgp->rdma_tags_available = 15;
2516
2517	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0);
2518	if (status) {
2519		netdev_err(dev, "Couldn't bring up link\n");
2520		goto abort_with_rings;
2521	}
2522
2523	mgp->running = MYRI10GE_ETH_RUNNING;
2524	mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ;
2525	add_timer(&mgp->watchdog_timer);
2526	netif_tx_wake_all_queues(dev);
2527
2528	return 0;
2529
2530abort_with_rings:
2531	while (slice) {
2532		slice--;
2533		napi_disable(&mgp->ss[slice].napi);
2534	}
2535	for (i = 0; i < mgp->num_slices; i++)
2536		myri10ge_free_rings(&mgp->ss[i]);
2537
2538	myri10ge_free_irq(mgp);
2539
2540abort_with_nothing:
2541	mgp->running = MYRI10GE_ETH_STOPPED;
2542	return -ENOMEM;
2543}
2544
2545static int myri10ge_close(struct net_device *dev)
2546{
2547	struct myri10ge_priv *mgp = netdev_priv(dev);
2548	struct myri10ge_cmd cmd;
2549	int status, old_down_cnt;
2550	int i;
2551
2552	if (mgp->running != MYRI10GE_ETH_RUNNING)
2553		return 0;
2554
2555	if (mgp->ss[0].tx.req_bytes == NULL)
2556		return 0;
2557
2558	del_timer_sync(&mgp->watchdog_timer);
2559	mgp->running = MYRI10GE_ETH_STOPPING;
2560	for (i = 0; i < mgp->num_slices; i++) {
2561		napi_disable(&mgp->ss[i].napi);
2562	}
2563	netif_carrier_off(dev);
2564
2565	netif_tx_stop_all_queues(dev);
2566	if (mgp->rebooted == 0) {
2567		old_down_cnt = mgp->down_cnt;
2568		mb();
2569		status =
2570		    myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0);
2571		if (status)
2572			netdev_err(dev, "Couldn't bring down link\n");
2573
2574		wait_event_timeout(mgp->down_wq, old_down_cnt != mgp->down_cnt,
2575				   HZ);
2576		if (old_down_cnt == mgp->down_cnt)
2577			netdev_err(dev, "never got down irq\n");
2578	}
2579	netif_tx_disable(dev);
2580	myri10ge_free_irq(mgp);
2581	for (i = 0; i < mgp->num_slices; i++)
2582		myri10ge_free_rings(&mgp->ss[i]);
2583
2584	mgp->running = MYRI10GE_ETH_STOPPED;
2585	return 0;
2586}
2587
2588/* copy an array of struct mcp_kreq_ether_send's to the mcp.  Copy
2589 * backwards one at a time and handle ring wraps */
2590
2591static inline void
2592myri10ge_submit_req_backwards(struct myri10ge_tx_buf *tx,
2593			      struct mcp_kreq_ether_send *src, int cnt)
2594{
2595	int idx, starting_slot;
2596	starting_slot = tx->req;
2597	while (cnt > 1) {
2598		cnt--;
2599		idx = (starting_slot + cnt) & tx->mask;
2600		myri10ge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src));
2601		mb();
2602	}
2603}
2604
2605/*
2606 * copy an array of struct mcp_kreq_ether_send's to the mcp.  Copy
2607 * at most 32 bytes at a time, so as to avoid involving the software
2608 * pio handler in the nic.   We re-write the first segment's flags
2609 * to mark them valid only after writing the entire chain.
2610 */
2611
2612static inline void
2613myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src,
2614		    int cnt)
2615{
2616	int idx, i;
2617	struct mcp_kreq_ether_send __iomem *dstp, *dst;
2618	struct mcp_kreq_ether_send *srcp;
2619	u8 last_flags;
2620
2621	idx = tx->req & tx->mask;
2622
2623	last_flags = src->flags;
2624	src->flags = 0;
2625	mb();
2626	dst = dstp = &tx->lanai[idx];
2627	srcp = src;
2628
2629	if ((idx + cnt) < tx->mask) {
2630		for (i = 0; i < (cnt - 1); i += 2) {
2631			myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src));
2632			mb();	/* force write every 32 bytes */
2633			srcp += 2;
2634			dstp += 2;
2635		}
2636	} else {
2637		/* submit all but the first request, and ensure
2638		 * that it is submitted below */
2639		myri10ge_submit_req_backwards(tx, src, cnt);
2640		i = 0;
2641	}
2642	if (i < cnt) {
2643		/* submit the first request */
2644		myri10ge_pio_copy(dstp, srcp, sizeof(*src));
2645		mb();		/* barrier before setting valid flag */
2646	}
2647
2648	/* re-write the last 32-bits with the valid flags */
2649	src->flags = last_flags;
2650	put_be32(*((__be32 *) src + 3), (__be32 __iomem *) dst + 3);
2651	tx->req += cnt;
2652	mb();
2653}
2654
2655/*
2656 * Transmit a packet.  We need to split the packet so that a single
2657 * segment does not cross myri10ge->tx_boundary, so this makes segment
2658 * counting tricky.  So rather than try to count segments up front, we
2659 * just give up if there are too few segments to hold a reasonably
2660 * fragmented packet currently available.  If we run
2661 * out of segments while preparing a packet for DMA, we just linearize
2662 * it and try again.
2663 */
2664
2665static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
2666				       struct net_device *dev)
2667{
2668	struct myri10ge_priv *mgp = netdev_priv(dev);
2669	struct myri10ge_slice_state *ss;
2670	struct mcp_kreq_ether_send *req;
2671	struct myri10ge_tx_buf *tx;
2672	struct skb_frag_struct *frag;
2673	struct netdev_queue *netdev_queue;
2674	dma_addr_t bus;
2675	u32 low;
2676	__be32 high_swapped;
2677	unsigned int len;
2678	int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
2679	u16 pseudo_hdr_offset, cksum_offset, queue;
2680	int cum_len, seglen, boundary, rdma_count;
2681	u8 flags, odd_flag;
2682
2683	queue = skb_get_queue_mapping(skb);
2684	ss = &mgp->ss[queue];
2685	netdev_queue = netdev_get_tx_queue(mgp->dev, queue);
2686	tx = &ss->tx;
2687
2688again:
2689	req = tx->req_list;
2690	avail = tx->mask - 1 - (tx->req - tx->done);
2691
2692	mss = 0;
2693	max_segments = MXGEFW_MAX_SEND_DESC;
2694
2695	if (skb_is_gso(skb)) {
2696		mss = skb_shinfo(skb)->gso_size;
2697		max_segments = MYRI10GE_MAX_SEND_DESC_TSO;
2698	}
2699
2700	if ((unlikely(avail < max_segments))) {
2701		/* we are out of transmit resources */
2702		tx->stop_queue++;
2703		netif_tx_stop_queue(netdev_queue);
2704		return NETDEV_TX_BUSY;
2705	}
2706
2707	/* Setup checksum offloading, if needed */
2708	cksum_offset = 0;
2709	pseudo_hdr_offset = 0;
2710	odd_flag = 0;
2711	flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
2712	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
2713		cksum_offset = skb_transport_offset(skb);
2714		pseudo_hdr_offset = cksum_offset + skb->csum_offset;
2715		/* If the headers are excessively large, then we must
2716		 * fall back to a software checksum */
2717		if (unlikely(!mss && (cksum_offset > 255 ||
2718				      pseudo_hdr_offset > 127))) {
2719			if (skb_checksum_help(skb))
2720				goto drop;
2721			cksum_offset = 0;
2722			pseudo_hdr_offset = 0;
2723		} else {
2724			odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2725			flags |= MXGEFW_FLAGS_CKSUM;
2726		}
2727	}
2728
2729	cum_len = 0;
2730
2731	if (mss) {		/* TSO */
2732		/* this removes any CKSUM flag from before */
2733		flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
2734
2735		/* negative cum_len signifies to the
2736		 * send loop that we are still in the
2737		 * header portion of the TSO packet.
2738		 * TSO header can be at most 1KB long */
2739		cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb));
2740
2741		/* for IPv6 TSO, the checksum offset stores the
2742		 * TCP header length, to save the firmware from
2743		 * the need to parse the headers */
2744		if (skb_is_gso_v6(skb)) {
2745			cksum_offset = tcp_hdrlen(skb);
2746			/* Can only handle headers <= max_tso6 long */
2747			if (unlikely(-cum_len > mgp->max_tso6))
2748				return myri10ge_sw_tso(skb, dev);
2749		}
2750		/* for TSO, pseudo_hdr_offset holds mss.
2751		 * The firmware figures out where to put
2752		 * the checksum by parsing the header. */
2753		pseudo_hdr_offset = mss;
2754	} else
2755		/* Mark small packets, and pad out tiny packets */
2756	if (skb->len <= MXGEFW_SEND_SMALL_SIZE) {
2757		flags |= MXGEFW_FLAGS_SMALL;
2758
2759		/* pad frames to at least ETH_ZLEN bytes */
2760		if (unlikely(skb->len < ETH_ZLEN)) {
2761			if (skb_padto(skb, ETH_ZLEN)) {
2762				/* The packet is gone, so we must
2763				 * return 0 */
2764				ss->stats.tx_dropped += 1;
2765				return NETDEV_TX_OK;
2766			}
2767			/* adjust the len to account for the zero pad
2768			 * so that the nic can know how long it is */
2769			skb->len = ETH_ZLEN;
2770		}
2771	}
2772
2773	/* map the skb for DMA */
2774	len = skb_headlen(skb);
2775	idx = tx->req & tx->mask;
2776	tx->info[idx].skb = skb;
2777	bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
2778	dma_unmap_addr_set(&tx->info[idx], bus, bus);
2779	dma_unmap_len_set(&tx->info[idx], len, len);
2780
2781	frag_cnt = skb_shinfo(skb)->nr_frags;
2782	frag_idx = 0;
2783	count = 0;
2784	rdma_count = 0;
2785
2786	/* "rdma_count" is the number of RDMAs belonging to the
2787	 * current packet BEFORE the current send request. For
2788	 * non-TSO packets, this is equal to "count".
2789	 * For TSO packets, rdma_count needs to be reset
2790	 * to 0 after a segment cut.
2791	 *
2792	 * The rdma_count field of the send request is
2793	 * the number of RDMAs of the packet starting at
2794	 * that request. For TSO send requests with one ore more cuts
2795	 * in the middle, this is the number of RDMAs starting
2796	 * after the last cut in the request. All previous
2797	 * segments before the last cut implicitly have 1 RDMA.
2798	 *
2799	 * Since the number of RDMAs is not known beforehand,
2800	 * it must be filled-in retroactively - after each
2801	 * segmentation cut or at the end of the entire packet.
2802	 */
2803
2804	while (1) {
2805		/* Break the SKB or Fragment up into pieces which
2806		 * do not cross mgp->tx_boundary */
2807		low = MYRI10GE_LOWPART_TO_U32(bus);
2808		high_swapped = htonl(MYRI10GE_HIGHPART_TO_U32(bus));
2809		while (len) {
2810			u8 flags_next;
2811			int cum_len_next;
2812
2813			if (unlikely(count == max_segments))
2814				goto abort_linearize;
2815
2816			boundary =
2817			    (low + mgp->tx_boundary) & ~(mgp->tx_boundary - 1);
2818			seglen = boundary - low;
2819			if (seglen > len)
2820				seglen = len;
2821			flags_next = flags & ~MXGEFW_FLAGS_FIRST;
2822			cum_len_next = cum_len + seglen;
2823			if (mss) {	/* TSO */
2824				(req - rdma_count)->rdma_count = rdma_count + 1;
2825
2826				if (likely(cum_len >= 0)) {	/* payload */
2827					int next_is_first, chop;
2828
2829					chop = (cum_len_next > mss);
2830					cum_len_next = cum_len_next % mss;
2831					next_is_first = (cum_len_next == 0);
2832					flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
2833					flags_next |= next_is_first *
2834					    MXGEFW_FLAGS_FIRST;
2835					rdma_count |= -(chop | next_is_first);
2836					rdma_count += chop & !next_is_first;
2837				} else if (likely(cum_len_next >= 0)) {	/* header ends */
2838					int small;
2839
2840					rdma_count = -1;
2841					cum_len_next = 0;
2842					seglen = -cum_len;
2843					small = (mss <= MXGEFW_SEND_SMALL_SIZE);
2844					flags_next = MXGEFW_FLAGS_TSO_PLD |
2845					    MXGEFW_FLAGS_FIRST |
2846					    (small * MXGEFW_FLAGS_SMALL);
2847				}
2848			}
2849			req->addr_high = high_swapped;
2850			req->addr_low = htonl(low);
2851			req->pseudo_hdr_offset = htons(pseudo_hdr_offset);
2852			req->pad = 0;	/* complete solid 16-byte block; does this matter? */
2853			req->rdma_count = 1;
2854			req->length = htons(seglen);
2855			req->cksum_offset = cksum_offset;
2856			req->flags = flags | ((cum_len & 1) * odd_flag);
2857
2858			low += seglen;
2859			len -= seglen;
2860			cum_len = cum_len_next;
2861			flags = flags_next;
2862			req++;
2863			count++;
2864			rdma_count++;
2865			if (cksum_offset != 0 && !(mss && skb_is_gso_v6(skb))) {
2866				if (unlikely(cksum_offset > seglen))
2867					cksum_offset -= seglen;
2868				else
2869					cksum_offset = 0;
2870			}
2871		}
2872		if (frag_idx == frag_cnt)
2873			break;
2874
2875		/* map next fragment for DMA */
2876		idx = (count + tx->req) & tx->mask;
2877		frag = &skb_shinfo(skb)->frags[frag_idx];
2878		frag_idx++;
2879		len = frag->size;
2880		bus = pci_map_page(mgp->pdev, frag->page, frag->page_offset,
2881				   len, PCI_DMA_TODEVICE);
2882		dma_unmap_addr_set(&tx->info[idx], bus, bus);
2883		dma_unmap_len_set(&tx->info[idx], len, len);
2884	}
2885
2886	(req - rdma_count)->rdma_count = rdma_count;
2887	if (mss)
2888		do {
2889			req--;
2890			req->flags |= MXGEFW_FLAGS_TSO_LAST;
2891		} while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
2892					 MXGEFW_FLAGS_FIRST)));
2893	idx = ((count - 1) + tx->req) & tx->mask;
2894	tx->info[idx].last = 1;
2895	myri10ge_submit_req(tx, tx->req_list, count);
2896	/* if using multiple tx queues, make sure NIC polls the
2897	 * current slice */
2898	if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) {
2899		tx->queue_active = 1;
2900		put_be32(htonl(1), tx->send_go);
2901		mb();
2902		mmiowb();
2903	}
2904	tx->pkt_start++;
2905	if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
2906		tx->stop_queue++;
2907		netif_tx_stop_queue(netdev_queue);
2908	}
2909	return NETDEV_TX_OK;
2910
2911abort_linearize:
2912	/* Free any DMA resources we've alloced and clear out the skb
2913	 * slot so as to not trip up assertions, and to avoid a
2914	 * double-free if linearizing fails */
2915
2916	last_idx = (idx + 1) & tx->mask;
2917	idx = tx->req & tx->mask;
2918	tx->info[idx].skb = NULL;
2919	do {
2920		len = dma_unmap_len(&tx->info[idx], len);
2921		if (len) {
2922			if (tx->info[idx].skb != NULL)
2923				pci_unmap_single(mgp->pdev,
2924						 dma_unmap_addr(&tx->info[idx],
2925								bus), len,
2926						 PCI_DMA_TODEVICE);
2927			else
2928				pci_unmap_page(mgp->pdev,
2929					       dma_unmap_addr(&tx->info[idx],
2930							      bus), len,
2931					       PCI_DMA_TODEVICE);
2932			dma_unmap_len_set(&tx->info[idx], len, 0);
2933			tx->info[idx].skb = NULL;
2934		}
2935		idx = (idx + 1) & tx->mask;
2936	} while (idx != last_idx);
2937	if (skb_is_gso(skb)) {
2938		netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n");
2939		goto drop;
2940	}
2941
2942	if (skb_linearize(skb))
2943		goto drop;
2944
2945	tx->linearized++;
2946	goto again;
2947
2948drop:
2949	dev_kfree_skb_any(skb);
2950	ss->stats.tx_dropped += 1;
2951	return NETDEV_TX_OK;
2952
2953}
2954
2955static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
2956					 struct net_device *dev)
2957{
2958	struct sk_buff *segs, *curr;
2959	struct myri10ge_priv *mgp = netdev_priv(dev);
2960	struct myri10ge_slice_state *ss;
2961	netdev_tx_t status;
2962
2963	segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6);
2964	if (IS_ERR(segs))
2965		goto drop;
2966
2967	while (segs) {
2968		curr = segs;
2969		segs = segs->next;
2970		curr->next = NULL;
2971		status = myri10ge_xmit(curr, dev);
2972		if (status != 0) {
2973			dev_kfree_skb_any(curr);
2974			if (segs != NULL) {
2975				curr = segs;
2976				segs = segs->next;
2977				curr->next = NULL;
2978				dev_kfree_skb_any(segs);
2979			}
2980			goto drop;
2981		}
2982	}
2983	dev_kfree_skb_any(skb);
2984	return NETDEV_TX_OK;
2985
2986drop:
2987	ss = &mgp->ss[skb_get_queue_mapping(skb)];
2988	dev_kfree_skb_any(skb);
2989	ss->stats.tx_dropped += 1;
2990	return NETDEV_TX_OK;
2991}
2992
2993static struct net_device_stats *myri10ge_get_stats(struct net_device *dev)
2994{
2995	struct myri10ge_priv *mgp = netdev_priv(dev);
2996	struct myri10ge_slice_netstats *slice_stats;
2997	struct net_device_stats *stats = &dev->stats;
2998	int i;
2999
3000	spin_lock(&mgp->stats_lock);
3001	memset(stats, 0, sizeof(*stats));
3002	for (i = 0; i < mgp->num_slices; i++) {
3003		slice_stats = &mgp->ss[i].stats;
3004		stats->rx_packets += slice_stats->rx_packets;
3005		stats->tx_packets += slice_stats->tx_packets;
3006		stats->rx_bytes += slice_stats->rx_bytes;
3007		stats->tx_bytes += slice_stats->tx_bytes;
3008		stats->rx_dropped += slice_stats->rx_dropped;
3009		stats->tx_dropped += slice_stats->tx_dropped;
3010	}
3011	spin_unlock(&mgp->stats_lock);
3012	return stats;
3013}
3014
3015static void myri10ge_set_multicast_list(struct net_device *dev)
3016{
3017	struct myri10ge_priv *mgp = netdev_priv(dev);
3018	struct myri10ge_cmd cmd;
3019	struct netdev_hw_addr *ha;
3020	__be32 data[2] = { 0, 0 };
3021	int err;
3022
3023	/* can be called from atomic contexts,
3024	 * pass 1 to force atomicity in myri10ge_send_cmd() */
3025	myri10ge_change_promisc(mgp, dev->flags & IFF_PROMISC, 1);
3026
3027	/* This firmware is known to not support multicast */
3028	if (!mgp->fw_multicast_support)
3029		return;
3030
3031	/* Disable multicast filtering */
3032
3033	err = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_ALLMULTI, &cmd, 1);
3034	if (err != 0) {
3035		netdev_err(dev, "Failed MXGEFW_ENABLE_ALLMULTI, error status: %d\n",
3036			   err);
3037		goto abort;
3038	}
3039
3040	if ((dev->flags & IFF_ALLMULTI) || mgp->adopted_rx_filter_bug) {
3041		/* request to disable multicast filtering, so quit here */
3042		return;
3043	}
3044
3045	/* Flush the filters */
3046
3047	err = myri10ge_send_cmd(mgp, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS,
3048				&cmd, 1);
3049	if (err != 0) {
3050		netdev_err(dev, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, error status: %d\n",
3051			   err);
3052		goto abort;
3053	}
3054
3055	/* Walk the multicast list, and add each address */
3056	netdev_for_each_mc_addr(ha, dev) {
3057		memcpy(data, &ha->addr, 6);
3058		cmd.data0 = ntohl(data[0]);
3059		cmd.data1 = ntohl(data[1]);
3060		err = myri10ge_send_cmd(mgp, MXGEFW_JOIN_MULTICAST_GROUP,
3061					&cmd, 1);
3062
3063		if (err != 0) {
3064			netdev_err(dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, error status:%d %pM\n",
3065				   err, ha->addr);
3066			goto abort;
3067		}
3068	}
3069	/* Enable multicast filtering */
3070	err = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_ALLMULTI, &cmd, 1);
3071	if (err != 0) {
3072		netdev_err(dev, "Failed MXGEFW_DISABLE_ALLMULTI, error status: %d\n",
3073			   err);
3074		goto abort;
3075	}
3076
3077	return;
3078
3079abort:
3080	return;
3081}
3082
3083static int myri10ge_set_mac_address(struct net_device *dev, void *addr)
3084{
3085	struct sockaddr *sa = addr;
3086	struct myri10ge_priv *mgp = netdev_priv(dev);
3087	int status;
3088
3089	if (!is_valid_ether_addr(sa->sa_data))
3090		return -EADDRNOTAVAIL;
3091
3092	status = myri10ge_update_mac_address(mgp, sa->sa_data);
3093	if (status != 0) {
3094		netdev_err(dev, "changing mac address failed with %d\n",
3095			   status);
3096		return status;
3097	}
3098
3099	/* change the dev structure */
3100	memcpy(dev->dev_addr, sa->sa_data, 6);
3101	return 0;
3102}
3103
3104static int myri10ge_change_mtu(struct net_device *dev, int new_mtu)
3105{
3106	struct myri10ge_priv *mgp = netdev_priv(dev);
3107	int error = 0;
3108
3109	if ((new_mtu < 68) || (ETH_HLEN + new_mtu > MYRI10GE_MAX_ETHER_MTU)) {
3110		netdev_err(dev, "new mtu (%d) is not valid\n", new_mtu);
3111		return -EINVAL;
3112	}
3113	netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu);
3114	if (mgp->running) {
3115		/* if we change the mtu on an active device, we must
3116		 * reset the device so the firmware sees the change */
3117		myri10ge_close(dev);
3118		dev->mtu = new_mtu;
3119		myri10ge_open(dev);
3120	} else
3121		dev->mtu = new_mtu;
3122
3123	return error;
3124}
3125
3126/*
3127 * Enable ECRC to align PCI-E Completion packets on an 8-byte boundary.
3128 * Only do it if the bridge is a root port since we don't want to disturb
3129 * any other device, except if forced with myri10ge_ecrc_enable > 1.
3130 */
3131
3132static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp)
3133{
3134	struct pci_dev *bridge = mgp->pdev->bus->self;
3135	struct device *dev = &mgp->pdev->dev;
3136	unsigned cap;
3137	unsigned err_cap;
3138	u16 val;
3139	u8 ext_type;
3140	int ret;
3141
3142	if (!myri10ge_ecrc_enable || !bridge)
3143		return;
3144
3145	/* check that the bridge is a root port */
3146	cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
3147	pci_read_config_word(bridge, cap + PCI_CAP_FLAGS, &val);
3148	ext_type = (val & PCI_EXP_FLAGS_TYPE) >> 4;
3149	if (ext_type != PCI_EXP_TYPE_ROOT_PORT) {
3150		if (myri10ge_ecrc_enable > 1) {
3151			struct pci_dev *prev_bridge, *old_bridge = bridge;
3152
3153			/* Walk the hierarchy up to the root port
3154			 * where ECRC has to be enabled */
3155			do {
3156				prev_bridge = bridge;
3157				bridge = bridge->bus->self;
3158				if (!bridge || prev_bridge == bridge) {
3159					dev_err(dev,
3160						"Failed to find root port"
3161						" to force ECRC\n");
3162					return;
3163				}
3164				cap =
3165				    pci_find_capability(bridge, PCI_CAP_ID_EXP);
3166				pci_read_config_word(bridge,
3167						     cap + PCI_CAP_FLAGS, &val);
3168				ext_type = (val & PCI_EXP_FLAGS_TYPE) >> 4;
3169			} while (ext_type != PCI_EXP_TYPE_ROOT_PORT);
3170
3171			dev_info(dev,
3172				 "Forcing ECRC on non-root port %s"
3173				 " (enabling on root port %s)\n",
3174				 pci_name(old_bridge), pci_name(bridge));
3175		} else {
3176			dev_err(dev,
3177				"Not enabling ECRC on non-root port %s\n",
3178				pci_name(bridge));
3179			return;
3180		}
3181	}
3182
3183	cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR);
3184	if (!cap)
3185		return;
3186
3187	ret = pci_read_config_dword(bridge, cap + PCI_ERR_CAP, &err_cap);
3188	if (ret) {
3189		dev_err(dev, "failed reading ext-conf-space of %s\n",
3190			pci_name(bridge));
3191		dev_err(dev, "\t pci=nommconf in use? "
3192			"or buggy/incomplete/absent ACPI MCFG attr?\n");
3193		return;
3194	}
3195	if (!(err_cap & PCI_ERR_CAP_ECRC_GENC))
3196		return;
3197
3198	err_cap |= PCI_ERR_CAP_ECRC_GENE;
3199	pci_write_config_dword(bridge, cap + PCI_ERR_CAP, err_cap);
3200	dev_info(dev, "Enabled ECRC on upstream bridge %s\n", pci_name(bridge));
3201}
3202
3203/*
3204 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
3205 * when the PCI-E Completion packets are aligned on an 8-byte
3206 * boundary.  Some PCI-E chip sets always align Completion packets; on
3207 * the ones that do not, the alignment can be enforced by enabling
3208 * ECRC generation (if supported).
3209 *
3210 * When PCI-E Completion packets are not aligned, it is actually more
3211 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
3212 *
3213 * If the driver can neither enable ECRC nor verify that it has
3214 * already been enabled, then it must use a firmware image which works
3215 * around unaligned completion packets (myri10ge_rss_ethp_z8e.dat), and it
3216 * should also ensure that it never gives the device a Read-DMA which is
3217 * larger than 2KB by setting the tx_boundary to 2KB.  If ECRC is
3218 * enabled, then the driver should use the aligned (myri10ge_rss_eth_z8e.dat)
3219 * firmware image, and set tx_boundary to 4KB.
3220 */
3221
3222static void myri10ge_firmware_probe(struct myri10ge_priv *mgp)
3223{
3224	struct pci_dev *pdev = mgp->pdev;
3225	struct device *dev = &pdev->dev;
3226	int status;
3227
3228	mgp->tx_boundary = 4096;
3229	/*
3230	 * Verify the max read request size was set to 4KB
3231	 * before trying the test with 4KB.
3232	 */
3233	status = pcie_get_readrq(pdev);
3234	if (status < 0) {
3235		dev_err(dev, "Couldn't read max read req size: %d\n", status);
3236		goto abort;
3237	}
3238	if (status != 4096) {
3239		dev_warn(dev, "Max Read Request size != 4096 (%d)\n", status);
3240		mgp->tx_boundary = 2048;
3241	}
3242	/*
3243	 * load the optimized firmware (which assumes aligned PCIe
3244	 * completions) in order to see if it works on this host.
3245	 */
3246	set_fw_name(mgp, myri10ge_fw_aligned, false);
3247	status = myri10ge_load_firmware(mgp, 1);
3248	if (status != 0) {
3249		goto abort;
3250	}
3251
3252	/*
3253	 * Enable ECRC if possible
3254	 */
3255	myri10ge_enable_ecrc(mgp);
3256
3257	/*
3258	 * Run a DMA test which watches for unaligned completions and
3259	 * aborts on the first one seen.
3260	 */
3261
3262	status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
3263	if (status == 0)
3264		return;		/* keep the aligned firmware */
3265
3266	if (status != -E2BIG)
3267		dev_warn(dev, "DMA test failed: %d\n", status);
3268	if (status == -ENOSYS)
3269		dev_warn(dev, "Falling back to ethp! "
3270			 "Please install up to date fw\n");
3271abort:
3272	/* fall back to using the unaligned firmware */
3273	mgp->tx_boundary = 2048;
3274	set_fw_name(mgp, myri10ge_fw_unaligned, false);
3275
3276}
3277
3278static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
3279{
3280	int overridden = 0;
3281
3282	if (myri10ge_force_firmware == 0) {
3283		int link_width, exp_cap;
3284		u16 lnk;
3285
3286		exp_cap = pci_find_capability(mgp->pdev, PCI_CAP_ID_EXP);
3287		pci_read_config_word(mgp->pdev, exp_cap + PCI_EXP_LNKSTA, &lnk);
3288		link_width = (lnk >> 4) & 0x3f;
3289
3290		/* Check to see if Link is less than 8 or if the
3291		 * upstream bridge is known to provide aligned
3292		 * completions */
3293		if (link_width < 8) {
3294			dev_info(&mgp->pdev->dev, "PCIE x%d Link\n",
3295				 link_width);
3296			mgp->tx_boundary = 4096;
3297			set_fw_name(mgp, myri10ge_fw_aligned, false);
3298		} else {
3299			myri10ge_firmware_probe(mgp);
3300		}
3301	} else {
3302		if (myri10ge_force_firmware == 1) {
3303			dev_info(&mgp->pdev->dev,
3304				 "Assuming aligned completions (forced)\n");
3305			mgp->tx_boundary = 4096;
3306			set_fw_name(mgp, myri10ge_fw_aligned, false);
3307		} else {
3308			dev_info(&mgp->pdev->dev,
3309				 "Assuming unaligned completions (forced)\n");
3310			mgp->tx_boundary = 2048;
3311			set_fw_name(mgp, myri10ge_fw_unaligned, false);
3312		}
3313	}
3314
3315	kparam_block_sysfs_write(myri10ge_fw_name);
3316	if (myri10ge_fw_name != NULL) {
3317		char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL);
3318		if (fw_name) {
3319			overridden = 1;
3320			set_fw_name(mgp, fw_name, true);
3321		}
3322	}
3323	kparam_unblock_sysfs_write(myri10ge_fw_name);
3324
3325	if (mgp->board_number < MYRI10GE_MAX_BOARDS &&
3326	    myri10ge_fw_names[mgp->board_number] != NULL &&
3327	    strlen(myri10ge_fw_names[mgp->board_number])) {
3328		set_fw_name(mgp, myri10ge_fw_names[mgp->board_number], false);
3329		overridden = 1;
3330	}
3331	if (overridden)
3332		dev_info(&mgp->pdev->dev, "overriding firmware to %s\n",
3333			 mgp->fw_name);
3334}
3335
3336#ifdef CONFIG_PM
3337static int myri10ge_suspend(struct pci_dev *pdev, pm_message_t state)
3338{
3339	struct myri10ge_priv *mgp;
3340	struct net_device *netdev;
3341
3342	mgp = pci_get_drvdata(pdev);
3343	if (mgp == NULL)
3344		return -EINVAL;
3345	netdev = mgp->dev;
3346
3347	netif_device_detach(netdev);
3348	if (netif_running(netdev)) {
3349		netdev_info(netdev, "closing\n");
3350		rtnl_lock();
3351		myri10ge_close(netdev);
3352		rtnl_unlock();
3353	}
3354	myri10ge_dummy_rdma(mgp, 0);
3355	pci_save_state(pdev);
3356	pci_disable_device(pdev);
3357
3358	return pci_set_power_state(pdev, pci_choose_state(pdev, state));
3359}
3360
3361static int myri10ge_resume(struct pci_dev *pdev)
3362{
3363	struct myri10ge_priv *mgp;
3364	struct net_device *netdev;
3365	int status;
3366	u16 vendor;
3367
3368	mgp = pci_get_drvdata(pdev);
3369	if (mgp == NULL)
3370		return -EINVAL;
3371	netdev = mgp->dev;
3372	pci_set_power_state(pdev, 0);	/* zeros conf space as a side effect */
3373	msleep(5);		/* give card time to respond */
3374	pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor);
3375	if (vendor == 0xffff) {
3376		netdev_err(mgp->dev, "device disappeared!\n");
3377		return -EIO;
3378	}
3379
3380	status = pci_restore_state(pdev);
3381	if (status)
3382		return status;
3383
3384	status = pci_enable_device(pdev);
3385	if (status) {
3386		dev_err(&pdev->dev, "failed to enable device\n");
3387		return status;
3388	}
3389
3390	pci_set_master(pdev);
3391
3392	myri10ge_reset(mgp);
3393	myri10ge_dummy_rdma(mgp, 1);
3394
3395	/* Save configuration space to be restored if the
3396	 * nic resets due to a parity error */
3397	pci_save_state(pdev);
3398
3399	if (netif_running(netdev)) {
3400		rtnl_lock();
3401		status = myri10ge_open(netdev);
3402		rtnl_unlock();
3403		if (status != 0)
3404			goto abort_with_enabled;
3405
3406	}
3407	netif_device_attach(netdev);
3408
3409	return 0;
3410
3411abort_with_enabled:
3412	pci_disable_device(pdev);
3413	return -EIO;
3414
3415}
3416#endif				/* CONFIG_PM */
3417
3418static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp)
3419{
3420	struct pci_dev *pdev = mgp->pdev;
3421	int vs = mgp->vendor_specific_offset;
3422	u32 reboot;
3423
3424	/*enter read32 mode */
3425	pci_write_config_byte(pdev, vs + 0x10, 0x3);
3426
3427	/*read REBOOT_STATUS (0xfffffff0) */
3428	pci_write_config_dword(pdev, vs + 0x18, 0xfffffff0);
3429	pci_read_config_dword(pdev, vs + 0x14, &reboot);
3430	return reboot;
3431}
3432
3433/*
3434 * This watchdog is used to check whether the board has suffered
3435 * from a parity error and needs to be recovered.
3436 */
3437static void myri10ge_watchdog(struct work_struct *work)
3438{
3439	struct myri10ge_priv *mgp =
3440	    container_of(work, struct myri10ge_priv, watchdog_work);
3441	struct myri10ge_tx_buf *tx;
3442	u32 reboot;
3443	int status, rebooted;
3444	int i;
3445	u16 cmd, vendor;
3446
3447	mgp->watchdog_resets++;
3448	pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd);
3449	rebooted = 0;
3450	if ((cmd & PCI_COMMAND_MASTER) == 0) {
3451		/* Bus master DMA disabled?  Check to see
3452		 * if the card rebooted due to a parity error
3453		 * For now, just report it */
3454		reboot = myri10ge_read_reboot(mgp);
3455		netdev_err(mgp->dev, "NIC rebooted (0x%x),%s resetting\n",
3456			   reboot,
3457			   myri10ge_reset_recover ? "" : " not");
3458		if (myri10ge_reset_recover == 0)
3459			return;
3460		rtnl_lock();
3461		mgp->rebooted = 1;
3462		rebooted = 1;
3463		myri10ge_close(mgp->dev);
3464		myri10ge_reset_recover--;
3465		mgp->rebooted = 0;
3466		/*
3467		 * A rebooted nic will come back with config space as
3468		 * it was after power was applied to PCIe bus.
3469		 * Attempt to restore config space which was saved
3470		 * when the driver was loaded, or the last time the
3471		 * nic was resumed from power saving mode.
3472		 */
3473		pci_restore_state(mgp->pdev);
3474
3475		/* save state again for accounting reasons */
3476		pci_save_state(mgp->pdev);
3477
3478	} else {
3479		/* if we get back -1's from our slot, perhaps somebody
3480		 * powered off our card.  Don't try to reset it in
3481		 * this case */
3482		if (cmd == 0xffff) {
3483			pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor);
3484			if (vendor == 0xffff) {
3485				netdev_err(mgp->dev, "device disappeared!\n");
3486				return;
3487			}
3488		}
3489		/* Perhaps it is a software error.  Try to reset */
3490
3491		netdev_err(mgp->dev, "device timeout, resetting\n");
3492		for (i = 0; i < mgp->num_slices; i++) {
3493			tx = &mgp->ss[i].tx;
3494			netdev_err(mgp->dev, "(%d): %d %d %d %d %d %d\n",
3495				   i, tx->queue_active, tx->req,
3496				   tx->done, tx->pkt_start, tx->pkt_done,
3497				   (int)ntohl(mgp->ss[i].fw_stats->
3498					      send_done_count));
3499			msleep(2000);
3500			netdev_info(mgp->dev, "(%d): %d %d %d %d %d %d\n",
3501				    i, tx->queue_active, tx->req,
3502				    tx->done, tx->pkt_start, tx->pkt_done,
3503				    (int)ntohl(mgp->ss[i].fw_stats->
3504					       send_done_count));
3505		}
3506	}
3507
3508	if (!rebooted) {
3509		rtnl_lock();
3510		myri10ge_close(mgp->dev);
3511	}
3512	status = myri10ge_load_firmware(mgp, 1);
3513	if (status != 0)
3514		netdev_err(mgp->dev, "failed to load firmware\n");
3515	else
3516		myri10ge_open(mgp->dev);
3517	rtnl_unlock();
3518}
3519
3520/*
3521 * We use our own timer routine rather than relying upon
3522 * netdev->tx_timeout because we have a very large hardware transmit
3523 * queue.  Due to the large queue, the netdev->tx_timeout function
3524 * cannot detect a NIC with a parity error in a timely fashion if the
3525 * NIC is lightly loaded.
3526 */
3527static void myri10ge_watchdog_timer(unsigned long arg)
3528{
3529	struct myri10ge_priv *mgp;
3530	struct myri10ge_slice_state *ss;
3531	int i, reset_needed, busy_slice_cnt;
3532	u32 rx_pause_cnt;
3533	u16 cmd;
3534
3535	mgp = (struct myri10ge_priv *)arg;
3536
3537	rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause);
3538	busy_slice_cnt = 0;
3539	for (i = 0, reset_needed = 0;
3540	     i < mgp->num_slices && reset_needed == 0; ++i) {
3541
3542		ss = &mgp->ss[i];
3543		if (ss->rx_small.watchdog_needed) {
3544			myri10ge_alloc_rx_pages(mgp, &ss->rx_small,
3545						mgp->small_bytes + MXGEFW_PAD,
3546						1);
3547			if (ss->rx_small.fill_cnt - ss->rx_small.cnt >=
3548			    myri10ge_fill_thresh)
3549				ss->rx_small.watchdog_needed = 0;
3550		}
3551		if (ss->rx_big.watchdog_needed) {
3552			myri10ge_alloc_rx_pages(mgp, &ss->rx_big,
3553						mgp->big_bytes, 1);
3554			if (ss->rx_big.fill_cnt - ss->rx_big.cnt >=
3555			    myri10ge_fill_thresh)
3556				ss->rx_big.watchdog_needed = 0;
3557		}
3558
3559		if (ss->tx.req != ss->tx.done &&
3560		    ss->tx.done == ss->watchdog_tx_done &&
3561		    ss->watchdog_tx_req != ss->watchdog_tx_done) {
3562			/* nic seems like it might be stuck.. */
3563			if (rx_pause_cnt != mgp->watchdog_pause) {
3564				if (net_ratelimit())
3565					netdev_err(mgp->dev, "slice %d: TX paused, check link partner\n",
3566						   i);
3567			} else {
3568				netdev_warn(mgp->dev, "slice %d stuck:", i);
3569				reset_needed = 1;
3570			}
3571		}
3572		if (ss->watchdog_tx_done != ss->tx.done ||
3573		    ss->watchdog_rx_done != ss->rx_done.cnt) {
3574			busy_slice_cnt++;
3575		}
3576		ss->watchdog_tx_done = ss->tx.done;
3577		ss->watchdog_tx_req = ss->tx.req;
3578		ss->watchdog_rx_done = ss->rx_done.cnt;
3579	}
3580	/* if we've sent or received no traffic, poll the NIC to
3581	 * ensure it is still there.  Otherwise, we risk not noticing
3582	 * an error in a timely fashion */
3583	if (busy_slice_cnt == 0) {
3584		pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd);
3585		if ((cmd & PCI_COMMAND_MASTER) == 0) {
3586			reset_needed = 1;
3587		}
3588	}
3589	mgp->watchdog_pause = rx_pause_cnt;
3590
3591	if (reset_needed) {
3592		schedule_work(&mgp->watchdog_work);
3593	} else {
3594		/* rearm timer */
3595		mod_timer(&mgp->watchdog_timer,
3596			  jiffies + myri10ge_watchdog_timeout * HZ);
3597	}
3598}
3599
3600static void myri10ge_free_slices(struct myri10ge_priv *mgp)
3601{
3602	struct myri10ge_slice_state *ss;
3603	struct pci_dev *pdev = mgp->pdev;
3604	size_t bytes;
3605	int i;
3606
3607	if (mgp->ss == NULL)
3608		return;
3609
3610	for (i = 0; i < mgp->num_slices; i++) {
3611		ss = &mgp->ss[i];
3612		if (ss->rx_done.entry != NULL) {
3613			bytes = mgp->max_intr_slots *
3614			    sizeof(*ss->rx_done.entry);
3615			dma_free_coherent(&pdev->dev, bytes,
3616					  ss->rx_done.entry, ss->rx_done.bus);
3617			ss->rx_done.entry = NULL;
3618		}
3619		if (ss->fw_stats != NULL) {
3620			bytes = sizeof(*ss->fw_stats);
3621			dma_free_coherent(&pdev->dev, bytes,
3622					  ss->fw_stats, ss->fw_stats_bus);
3623			ss->fw_stats = NULL;
3624		}
3625	}
3626	kfree(mgp->ss);
3627	mgp->ss = NULL;
3628}
3629
3630static int myri10ge_alloc_slices(struct myri10ge_priv *mgp)
3631{
3632	struct myri10ge_slice_state *ss;
3633	struct pci_dev *pdev = mgp->pdev;
3634	size_t bytes;
3635	int i;
3636
3637	bytes = sizeof(*mgp->ss) * mgp->num_slices;
3638	mgp->ss = kzalloc(bytes, GFP_KERNEL);
3639	if (mgp->ss == NULL) {
3640		return -ENOMEM;
3641	}
3642
3643	for (i = 0; i < mgp->num_slices; i++) {
3644		ss = &mgp->ss[i];
3645		bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry);
3646		ss->rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes,
3647						       &ss->rx_done.bus,
3648						       GFP_KERNEL);
3649		if (ss->rx_done.entry == NULL)
3650			goto abort;
3651		memset(ss->rx_done.entry, 0, bytes);
3652		bytes = sizeof(*ss->fw_stats);
3653		ss->fw_stats = dma_alloc_coherent(&pdev->dev, bytes,
3654						  &ss->fw_stats_bus,
3655						  GFP_KERNEL);
3656		if (ss->fw_stats == NULL)
3657			goto abort;
3658		ss->mgp = mgp;
3659		ss->dev = mgp->dev;
3660		netif_napi_add(ss->dev, &ss->napi, myri10ge_poll,
3661			       myri10ge_napi_weight);
3662	}
3663	return 0;
3664abort:
3665	myri10ge_free_slices(mgp);
3666	return -ENOMEM;
3667}
3668
3669/*
3670 * This function determines the number of slices supported.
3671 * The number slices is the minumum of the number of CPUS,
3672 * the number of MSI-X irqs supported, the number of slices
3673 * supported by the firmware
3674 */
3675static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
3676{
3677	struct myri10ge_cmd cmd;
3678	struct pci_dev *pdev = mgp->pdev;
3679	char *old_fw;
3680	bool old_allocated;
3681	int i, status, ncpus, msix_cap;
3682
3683	mgp->num_slices = 1;
3684	msix_cap = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
3685	ncpus = num_online_cpus();
3686
3687	if (myri10ge_max_slices == 1 || msix_cap == 0 ||
3688	    (myri10ge_max_slices == -1 && ncpus < 2))
3689		return;
3690
3691	/* try to load the slice aware rss firmware */
3692	old_fw = mgp->fw_name;
3693	old_allocated = mgp->fw_name_allocated;
3694	/* don't free old_fw if we override it. */
3695	mgp->fw_name_allocated = false;
3696
3697	if (myri10ge_fw_name != NULL) {
3698		dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n",
3699			 myri10ge_fw_name);
3700		set_fw_name(mgp, myri10ge_fw_name, false);
3701	} else if (old_fw == myri10ge_fw_aligned)
3702		set_fw_name(mgp, myri10ge_fw_rss_aligned, false);
3703	else
3704		set_fw_name(mgp, myri10ge_fw_rss_unaligned, false);
3705	status = myri10ge_load_firmware(mgp, 0);
3706	if (status != 0) {
3707		dev_info(&pdev->dev, "Rss firmware not found\n");
3708		if (old_allocated)
3709			kfree(old_fw);
3710		return;
3711	}
3712
3713	/* hit the board with a reset to ensure it is alive */
3714	memset(&cmd, 0, sizeof(cmd));
3715	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0);
3716	if (status != 0) {
3717		dev_err(&mgp->pdev->dev, "failed reset\n");
3718		goto abort_with_fw;
3719	}
3720
3721	mgp->max_intr_slots = cmd.data0 / sizeof(struct mcp_slot);
3722
3723	/* tell it the size of the interrupt queues */
3724	cmd.data0 = mgp->max_intr_slots * sizeof(struct mcp_slot);
3725	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0);
3726	if (status != 0) {
3727		dev_err(&mgp->pdev->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
3728		goto abort_with_fw;
3729	}
3730
3731	/* ask the maximum number of slices it supports */
3732	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd, 0);
3733	if (status != 0)
3734		goto abort_with_fw;
3735	else
3736		mgp->num_slices = cmd.data0;
3737
3738	/* Only allow multiple slices if MSI-X is usable */
3739	if (!myri10ge_msi) {
3740		goto abort_with_fw;
3741	}
3742
3743	/* if the admin did not specify a limit to how many
3744	 * slices we should use, cap it automatically to the
3745	 * number of CPUs currently online */
3746	if (myri10ge_max_slices == -1)
3747		myri10ge_max_slices = ncpus;
3748
3749	if (mgp->num_slices > myri10ge_max_slices)
3750		mgp->num_slices = myri10ge_max_slices;
3751
3752	/* Now try to allocate as many MSI-X vectors as we have
3753	 * slices. We give up on MSI-X if we can only get a single
3754	 * vector. */
3755
3756	mgp->msix_vectors = kzalloc(mgp->num_slices *
3757				    sizeof(*mgp->msix_vectors), GFP_KERNEL);
3758	if (mgp->msix_vectors == NULL)
3759		goto disable_msix;
3760	for (i = 0; i < mgp->num_slices; i++) {
3761		mgp->msix_vectors[i].entry = i;
3762	}
3763
3764	while (mgp->num_slices > 1) {
3765		/* make sure it is a power of two */
3766		while (!is_power_of_2(mgp->num_slices))
3767			mgp->num_slices--;
3768		if (mgp->num_slices == 1)
3769			goto disable_msix;
3770		status = pci_enable_msix(pdev, mgp->msix_vectors,
3771					 mgp->num_slices);
3772		if (status == 0) {
3773			pci_disable_msix(pdev);
3774			if (old_allocated)
3775				kfree(old_fw);
3776			return;
3777		}
3778		if (status > 0)
3779			mgp->num_slices = status;
3780		else
3781			goto disable_msix;
3782	}
3783
3784disable_msix:
3785	if (mgp->msix_vectors != NULL) {
3786		kfree(mgp->msix_vectors);
3787		mgp->msix_vectors = NULL;
3788	}
3789
3790abort_with_fw:
3791	mgp->num_slices = 1;
3792	set_fw_name(mgp, old_fw, old_allocated);
3793	myri10ge_load_firmware(mgp, 0);
3794}
3795
3796static const struct net_device_ops myri10ge_netdev_ops = {
3797	.ndo_open		= myri10ge_open,
3798	.ndo_stop		= myri10ge_close,
3799	.ndo_start_xmit		= myri10ge_xmit,
3800	.ndo_get_stats		= myri10ge_get_stats,
3801	.ndo_validate_addr	= eth_validate_addr,
3802	.ndo_change_mtu		= myri10ge_change_mtu,
3803	.ndo_set_multicast_list = myri10ge_set_multicast_list,
3804	.ndo_set_mac_address	= myri10ge_set_mac_address,
3805};
3806
3807static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
3808{
3809	struct net_device *netdev;
3810	struct myri10ge_priv *mgp;
3811	struct device *dev = &pdev->dev;
3812	int i;
3813	int status = -ENXIO;
3814	int dac_enabled;
3815	unsigned hdr_offset, ss_offset;
3816	static int board_number;
3817
3818	netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES);
3819	if (netdev == NULL) {
3820		dev_err(dev, "Could not allocate ethernet device\n");
3821		return -ENOMEM;
3822	}
3823
3824	SET_NETDEV_DEV(netdev, &pdev->dev);
3825
3826	mgp = netdev_priv(netdev);
3827	mgp->dev = netdev;
3828	mgp->pdev = pdev;
3829	mgp->csum_flag = MXGEFW_FLAGS_CKSUM;
3830	mgp->pause = myri10ge_flow_control;
3831	mgp->intr_coal_delay = myri10ge_intr_coal_delay;
3832	mgp->msg_enable = netif_msg_init(myri10ge_debug, MYRI10GE_MSG_DEFAULT);
3833	mgp->board_number = board_number;
3834	init_waitqueue_head(&mgp->down_wq);
3835
3836	if (pci_enable_device(pdev)) {
3837		dev_err(&pdev->dev, "pci_enable_device call failed\n");
3838		status = -ENODEV;
3839		goto abort_with_netdev;
3840	}
3841
3842	/* Find the vendor-specific cap so we can check
3843	 * the reboot register later on */
3844	mgp->vendor_specific_offset
3845	    = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
3846
3847	/* Set our max read request to 4KB */
3848	status = pcie_set_readrq(pdev, 4096);
3849	if (status != 0) {
3850		dev_err(&pdev->dev, "Error %d writing PCI_EXP_DEVCTL\n",
3851			status);
3852		goto abort_with_enabled;
3853	}
3854
3855	pci_set_master(pdev);
3856	dac_enabled = 1;
3857	status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3858	if (status != 0) {
3859		dac_enabled = 0;
3860		dev_err(&pdev->dev,
3861			"64-bit pci address mask was refused, "
3862			"trying 32-bit\n");
3863		status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3864	}
3865	if (status != 0) {
3866		dev_err(&pdev->dev, "Error %d setting DMA mask\n", status);
3867		goto abort_with_enabled;
3868	}
3869	(void)pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3870	mgp->cmd = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->cmd),
3871				      &mgp->cmd_bus, GFP_KERNEL);
3872	if (mgp->cmd == NULL)
3873		goto abort_with_enabled;
3874
3875	mgp->board_span = pci_resource_len(pdev, 0);
3876	mgp->iomem_base = pci_resource_start(pdev, 0);
3877	mgp->mtrr = -1;
3878	mgp->wc_enabled = 0;
3879#ifdef CONFIG_MTRR
3880	mgp->mtrr = mtrr_add(mgp->iomem_base, mgp->board_span,
3881			     MTRR_TYPE_WRCOMB, 1);
3882	if (mgp->mtrr >= 0)
3883		mgp->wc_enabled = 1;
3884#endif
3885	mgp->sram = ioremap_wc(mgp->iomem_base, mgp->board_span);
3886	if (mgp->sram == NULL) {
3887		dev_err(&pdev->dev, "ioremap failed for %ld bytes at 0x%lx\n",
3888			mgp->board_span, mgp->iomem_base);
3889		status = -ENXIO;
3890		goto abort_with_mtrr;
3891	}
3892	hdr_offset =
3893	    ntohl(__raw_readl(mgp->sram + MCP_HEADER_PTR_OFFSET)) & 0xffffc;
3894	ss_offset = hdr_offset + offsetof(struct mcp_gen_header, string_specs);
3895	mgp->sram_size = ntohl(__raw_readl(mgp->sram + ss_offset));
3896	if (mgp->sram_size > mgp->board_span ||
3897	    mgp->sram_size <= MYRI10GE_FW_OFFSET) {
3898		dev_err(&pdev->dev,
3899			"invalid sram_size %dB or board span %ldB\n",
3900			mgp->sram_size, mgp->board_span);
3901		goto abort_with_ioremap;
3902	}
3903	memcpy_fromio(mgp->eeprom_strings,
3904		      mgp->sram + mgp->sram_size, MYRI10GE_EEPROM_STRINGS_SIZE);
3905	memset(mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2);
3906	status = myri10ge_read_mac_addr(mgp);
3907	if (status)
3908		goto abort_with_ioremap;
3909
3910	for (i = 0; i < ETH_ALEN; i++)
3911		netdev->dev_addr[i] = mgp->mac_addr[i];
3912
3913	myri10ge_select_firmware(mgp);
3914
3915	status = myri10ge_load_firmware(mgp, 1);
3916	if (status != 0) {
3917		dev_err(&pdev->dev, "failed to load firmware\n");
3918		goto abort_with_ioremap;
3919	}
3920	myri10ge_probe_slices(mgp);
3921	status = myri10ge_alloc_slices(mgp);
3922	if (status != 0) {
3923		dev_err(&pdev->dev, "failed to alloc slice state\n");
3924		goto abort_with_firmware;
3925	}
3926	netdev->real_num_tx_queues = mgp->num_slices;
3927	status = myri10ge_reset(mgp);
3928	if (status != 0) {
3929		dev_err(&pdev->dev, "failed reset\n");
3930		goto abort_with_slices;
3931	}
3932#ifdef CONFIG_MYRI10GE_DCA
3933	myri10ge_setup_dca(mgp);
3934#endif
3935	pci_set_drvdata(pdev, mgp);
3936	if ((myri10ge_initial_mtu + ETH_HLEN) > MYRI10GE_MAX_ETHER_MTU)
3937		myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN;
3938	if ((myri10ge_initial_mtu + ETH_HLEN) < 68)
3939		myri10ge_initial_mtu = 68;
3940
3941	netdev->netdev_ops = &myri10ge_netdev_ops;
3942	netdev->mtu = myri10ge_initial_mtu;
3943	netdev->base_addr = mgp->iomem_base;
3944	netdev->features = mgp->features;
3945
3946	if (dac_enabled)
3947		netdev->features |= NETIF_F_HIGHDMA;
3948	netdev->features |= NETIF_F_LRO;
3949
3950	netdev->vlan_features |= mgp->features;
3951	if (mgp->fw_ver_tiny < 37)
3952		netdev->vlan_features &= ~NETIF_F_TSO6;
3953	if (mgp->fw_ver_tiny < 32)
3954		netdev->vlan_features &= ~NETIF_F_TSO;
3955
3956	/* make sure we can get an irq, and that MSI can be
3957	 * setup (if available).  Also ensure netdev->irq
3958	 * is set to correct value if MSI is enabled */
3959	status = myri10ge_request_irq(mgp);
3960	if (status != 0)
3961		goto abort_with_firmware;
3962	netdev->irq = pdev->irq;
3963	myri10ge_free_irq(mgp);
3964
3965	/* Save configuration space to be restored if the
3966	 * nic resets due to a parity error */
3967	pci_save_state(pdev);
3968
3969	/* Setup the watchdog timer */
3970	setup_timer(&mgp->watchdog_timer, myri10ge_watchdog_timer,
3971		    (unsigned long)mgp);
3972
3973	spin_lock_init(&mgp->stats_lock);
3974	SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops);
3975	INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog);
3976	status = register_netdev(netdev);
3977	if (status != 0) {
3978		dev_err(&pdev->dev, "register_netdev failed: %d\n", status);
3979		goto abort_with_state;
3980	}
3981	if (mgp->msix_enabled)
3982		dev_info(dev, "%d MSI-X IRQs, tx bndry %d, fw %s, WC %s\n",
3983			 mgp->num_slices, mgp->tx_boundary, mgp->fw_name,
3984			 (mgp->wc_enabled ? "Enabled" : "Disabled"));
3985	else
3986		dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, WC %s\n",
3987			 mgp->msi_enabled ? "MSI" : "xPIC",
3988			 netdev->irq, mgp->tx_boundary, mgp->fw_name,
3989			 (mgp->wc_enabled ? "Enabled" : "Disabled"));
3990
3991	board_number++;
3992	return 0;
3993
3994abort_with_state:
3995	pci_restore_state(pdev);
3996
3997abort_with_slices:
3998	myri10ge_free_slices(mgp);
3999
4000abort_with_firmware:
4001	myri10ge_dummy_rdma(mgp, 0);
4002
4003abort_with_ioremap:
4004	if (mgp->mac_addr_string != NULL)
4005		dev_err(&pdev->dev,
4006			"myri10ge_probe() failed: MAC=%s, SN=%ld\n",
4007			mgp->mac_addr_string, mgp->serial_number);
4008	iounmap(mgp->sram);
4009
4010abort_with_mtrr:
4011#ifdef CONFIG_MTRR
4012	if (mgp->mtrr >= 0)
4013		mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span);
4014#endif
4015	dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd),
4016			  mgp->cmd, mgp->cmd_bus);
4017
4018abort_with_enabled:
4019	pci_disable_device(pdev);
4020
4021abort_with_netdev:
4022	set_fw_name(mgp, NULL, false);
4023	free_netdev(netdev);
4024	return status;
4025}
4026
4027/*
4028 * myri10ge_remove
4029 *
4030 * Does what is necessary to shutdown one Myrinet device. Called
4031 *   once for each Myrinet card by the kernel when a module is
4032 *   unloaded.
4033 */
4034static void myri10ge_remove(struct pci_dev *pdev)
4035{
4036	struct myri10ge_priv *mgp;
4037	struct net_device *netdev;
4038
4039	mgp = pci_get_drvdata(pdev);
4040	if (mgp == NULL)
4041		return;
4042
4043	flush_scheduled_work();
4044	netdev = mgp->dev;
4045	unregister_netdev(netdev);
4046
4047#ifdef CONFIG_MYRI10GE_DCA
4048	myri10ge_teardown_dca(mgp);
4049#endif
4050	myri10ge_dummy_rdma(mgp, 0);
4051
4052	/* avoid a memory leak */
4053	pci_restore_state(pdev);
4054
4055	iounmap(mgp->sram);
4056
4057#ifdef CONFIG_MTRR
4058	if (mgp->mtrr >= 0)
4059		mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span);
4060#endif
4061	myri10ge_free_slices(mgp);
4062	if (mgp->msix_vectors != NULL)
4063		kfree(mgp->msix_vectors);
4064	dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd),
4065			  mgp->cmd, mgp->cmd_bus);
4066
4067	set_fw_name(mgp, NULL, false);
4068	free_netdev(netdev);
4069	pci_disable_device(pdev);
4070	pci_set_drvdata(pdev, NULL);
4071}
4072
4073#define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E 	0x0008
4074#define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9	0x0009
4075
4076static DEFINE_PCI_DEVICE_TABLE(myri10ge_pci_tbl) = {
4077	{PCI_DEVICE(PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E)},
4078	{PCI_DEVICE
4079	 (PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9)},
4080	{0},
4081};
4082
4083MODULE_DEVICE_TABLE(pci, myri10ge_pci_tbl);
4084
4085static struct pci_driver myri10ge_driver = {
4086	.name = "myri10ge",
4087	.probe = myri10ge_probe,
4088	.remove = myri10ge_remove,
4089	.id_table = myri10ge_pci_tbl,
4090#ifdef CONFIG_PM
4091	.suspend = myri10ge_suspend,
4092	.resume = myri10ge_resume,
4093#endif
4094};
4095
4096#ifdef CONFIG_MYRI10GE_DCA
4097static int
4098myri10ge_notify_dca(struct notifier_block *nb, unsigned long event, void *p)
4099{
4100	int err = driver_for_each_device(&myri10ge_driver.driver,
4101					 NULL, &event,
4102					 myri10ge_notify_dca_device);
4103
4104	if (err)
4105		return NOTIFY_BAD;
4106	return NOTIFY_DONE;
4107}
4108
4109static struct notifier_block myri10ge_dca_notifier = {
4110	.notifier_call = myri10ge_notify_dca,
4111	.next = NULL,
4112	.priority = 0,
4113};
4114#endif				/* CONFIG_MYRI10GE_DCA */
4115
4116static __init int myri10ge_init_module(void)
4117{
4118	pr_info("Version %s\n", MYRI10GE_VERSION_STR);
4119
4120	if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) {
4121		pr_err("Illegal rssh hash type %d, defaulting to source port\n",
4122		       myri10ge_rss_hash);
4123		myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
4124	}
4125#ifdef CONFIG_MYRI10GE_DCA
4126	dca_register_notify(&myri10ge_dca_notifier);
4127#endif
4128	if (myri10ge_max_slices > MYRI10GE_MAX_SLICES)
4129		myri10ge_max_slices = MYRI10GE_MAX_SLICES;
4130
4131	return pci_register_driver(&myri10ge_driver);
4132}
4133
4134module_init(myri10ge_init_module);
4135
4136static __exit void myri10ge_cleanup_module(void)
4137{
4138#ifdef CONFIG_MYRI10GE_DCA
4139	dca_unregister_notify(&myri10ge_dca_notifier);
4140#endif
4141	pci_unregister_driver(&myri10ge_driver);
4142}
4143
4144module_exit(myri10ge_cleanup_module);
4145