• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/drivers/net/sfc/
1/****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2005-2006 Fen Systems Ltd.
4 * Copyright 2005-2009 Solarflare Communications Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
10
11#include <linux/pci.h>
12#include <linux/tcp.h>
13#include <linux/ip.h>
14#include <linux/in.h>
15#include <linux/ipv6.h>
16#include <linux/slab.h>
17#include <net/ipv6.h>
18#include <linux/if_ether.h>
19#include <linux/highmem.h>
20#include "net_driver.h"
21#include "efx.h"
22#include "nic.h"
23#include "workarounds.h"
24
25/*
26 * TX descriptor ring full threshold
27 *
28 * The tx_queue descriptor ring fill-level must fall below this value
29 * before we restart the netif queue
30 */
31#define EFX_TXQ_THRESHOLD (EFX_TXQ_MASK / 2u)
32
33/* We need to be able to nest calls to netif_tx_stop_queue(), partly
34 * because of the 2 hardware queues associated with each core queue,
35 * but also so that we can inhibit TX for reasons other than a full
36 * hardware queue. */
37void efx_stop_queue(struct efx_channel *channel)
38{
39	struct efx_nic *efx = channel->efx;
40
41	if (!channel->tx_queue)
42		return;
43
44	spin_lock_bh(&channel->tx_stop_lock);
45	netif_vdbg(efx, tx_queued, efx->net_dev, "stop TX queue\n");
46
47	atomic_inc(&channel->tx_stop_count);
48	netif_tx_stop_queue(
49		netdev_get_tx_queue(
50			efx->net_dev,
51			channel->tx_queue->queue / EFX_TXQ_TYPES));
52
53	spin_unlock_bh(&channel->tx_stop_lock);
54}
55
56/* Decrement core TX queue stop count and wake it if the count is 0 */
57void efx_wake_queue(struct efx_channel *channel)
58{
59	struct efx_nic *efx = channel->efx;
60
61	if (!channel->tx_queue)
62		return;
63
64	local_bh_disable();
65	if (atomic_dec_and_lock(&channel->tx_stop_count,
66				&channel->tx_stop_lock)) {
67		netif_vdbg(efx, tx_queued, efx->net_dev, "waking TX queue\n");
68		netif_tx_wake_queue(
69			netdev_get_tx_queue(
70				efx->net_dev,
71				channel->tx_queue->queue / EFX_TXQ_TYPES));
72		spin_unlock(&channel->tx_stop_lock);
73	}
74	local_bh_enable();
75}
76
77static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
78			       struct efx_tx_buffer *buffer)
79{
80	if (buffer->unmap_len) {
81		struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
82		dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
83					 buffer->unmap_len);
84		if (buffer->unmap_single)
85			pci_unmap_single(pci_dev, unmap_addr, buffer->unmap_len,
86					 PCI_DMA_TODEVICE);
87		else
88			pci_unmap_page(pci_dev, unmap_addr, buffer->unmap_len,
89				       PCI_DMA_TODEVICE);
90		buffer->unmap_len = 0;
91		buffer->unmap_single = false;
92	}
93
94	if (buffer->skb) {
95		dev_kfree_skb_any((struct sk_buff *) buffer->skb);
96		buffer->skb = NULL;
97		netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
98			   "TX queue %d transmission id %x complete\n",
99			   tx_queue->queue, tx_queue->read_count);
100	}
101}
102
103/**
104 * struct efx_tso_header - a DMA mapped buffer for packet headers
105 * @next: Linked list of free ones.
106 *	The list is protected by the TX queue lock.
107 * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
108 * @dma_addr: The DMA address of the header below.
109 *
110 * This controls the memory used for a TSO header.  Use TSOH_DATA()
111 * to find the packet header data.  Use TSOH_SIZE() to calculate the
112 * total size required for a given packet header length.  TSO headers
113 * in the free list are exactly %TSOH_STD_SIZE bytes in size.
114 */
115struct efx_tso_header {
116	union {
117		struct efx_tso_header *next;
118		size_t unmap_len;
119	};
120	dma_addr_t dma_addr;
121};
122
123static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
124			       struct sk_buff *skb);
125static void efx_fini_tso(struct efx_tx_queue *tx_queue);
126static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
127			       struct efx_tso_header *tsoh);
128
129static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
130			  struct efx_tx_buffer *buffer)
131{
132	if (buffer->tsoh) {
133		if (likely(!buffer->tsoh->unmap_len)) {
134			buffer->tsoh->next = tx_queue->tso_headers_free;
135			tx_queue->tso_headers_free = buffer->tsoh;
136		} else {
137			efx_tsoh_heap_free(tx_queue, buffer->tsoh);
138		}
139		buffer->tsoh = NULL;
140	}
141}
142
143
144static inline unsigned
145efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
146{
147	/* Depending on the NIC revision, we can use descriptor
148	 * lengths up to 8K or 8K-1.  However, since PCI Express
149	 * devices must split read requests at 4K boundaries, there is
150	 * little benefit from using descriptors that cross those
151	 * boundaries and we keep things simple by not doing so.
152	 */
153	unsigned len = (~dma_addr & 0xfff) + 1;
154
155	if (EFX_WORKAROUND_5391(efx) && (dma_addr & 0xf))
156		len = min_t(unsigned, len, 512 - (dma_addr & 0xf));
157
158	return len;
159}
160
161/*
162 * Add a socket buffer to a TX queue
163 *
164 * This maps all fragments of a socket buffer for DMA and adds them to
165 * the TX queue.  The queue's insert pointer will be incremented by
166 * the number of fragments in the socket buffer.
167 *
168 * If any DMA mapping fails, any mapped fragments will be unmapped,
169 * the queue's insert pointer will be restored to its original value.
170 *
171 * This function is split out from efx_hard_start_xmit to allow the
172 * loopback test to direct packets via specific TX queues.
173 *
174 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
175 * You must hold netif_tx_lock() to call this function.
176 */
177netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
178{
179	struct efx_nic *efx = tx_queue->efx;
180	struct pci_dev *pci_dev = efx->pci_dev;
181	struct efx_tx_buffer *buffer;
182	skb_frag_t *fragment;
183	struct page *page;
184	int page_offset;
185	unsigned int len, unmap_len = 0, fill_level, insert_ptr;
186	dma_addr_t dma_addr, unmap_addr = 0;
187	unsigned int dma_len;
188	bool unmap_single;
189	int q_space, i = 0;
190	netdev_tx_t rc = NETDEV_TX_OK;
191
192	EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
193
194	if (skb_shinfo(skb)->gso_size)
195		return efx_enqueue_skb_tso(tx_queue, skb);
196
197	/* Get size of the initial fragment */
198	len = skb_headlen(skb);
199
200	/* Pad if necessary */
201	if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) {
202		EFX_BUG_ON_PARANOID(skb->data_len);
203		len = 32 + 1;
204		if (skb_pad(skb, len - skb->len))
205			return NETDEV_TX_OK;
206	}
207
208	fill_level = tx_queue->insert_count - tx_queue->old_read_count;
209	q_space = EFX_TXQ_MASK - 1 - fill_level;
210
211	/* Map for DMA.  Use pci_map_single rather than pci_map_page
212	 * since this is more efficient on machines with sparse
213	 * memory.
214	 */
215	unmap_single = true;
216	dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
217
218	/* Process all fragments */
219	while (1) {
220		if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr)))
221			goto pci_err;
222
223		/* Store fields for marking in the per-fragment final
224		 * descriptor */
225		unmap_len = len;
226		unmap_addr = dma_addr;
227
228		/* Add to TX queue, splitting across DMA boundaries */
229		do {
230			if (unlikely(q_space-- <= 0)) {
231				/* It might be that completions have
232				 * happened since the xmit path last
233				 * checked.  Update the xmit path's
234				 * copy of read_count.
235				 */
236				++tx_queue->stopped;
237				/* This memory barrier protects the
238				 * change of stopped from the access
239				 * of read_count. */
240				smp_mb();
241				tx_queue->old_read_count =
242					*(volatile unsigned *)
243					&tx_queue->read_count;
244				fill_level = (tx_queue->insert_count
245					      - tx_queue->old_read_count);
246				q_space = EFX_TXQ_MASK - 1 - fill_level;
247				if (unlikely(q_space-- <= 0))
248					goto stop;
249				smp_mb();
250				--tx_queue->stopped;
251			}
252
253			insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
254			buffer = &tx_queue->buffer[insert_ptr];
255			efx_tsoh_free(tx_queue, buffer);
256			EFX_BUG_ON_PARANOID(buffer->tsoh);
257			EFX_BUG_ON_PARANOID(buffer->skb);
258			EFX_BUG_ON_PARANOID(buffer->len);
259			EFX_BUG_ON_PARANOID(!buffer->continuation);
260			EFX_BUG_ON_PARANOID(buffer->unmap_len);
261
262			dma_len = efx_max_tx_len(efx, dma_addr);
263			if (likely(dma_len >= len))
264				dma_len = len;
265
266			/* Fill out per descriptor fields */
267			buffer->len = dma_len;
268			buffer->dma_addr = dma_addr;
269			len -= dma_len;
270			dma_addr += dma_len;
271			++tx_queue->insert_count;
272		} while (len);
273
274		/* Transfer ownership of the unmapping to the final buffer */
275		buffer->unmap_single = unmap_single;
276		buffer->unmap_len = unmap_len;
277		unmap_len = 0;
278
279		/* Get address and size of next fragment */
280		if (i >= skb_shinfo(skb)->nr_frags)
281			break;
282		fragment = &skb_shinfo(skb)->frags[i];
283		len = fragment->size;
284		page = fragment->page;
285		page_offset = fragment->page_offset;
286		i++;
287		/* Map for DMA */
288		unmap_single = false;
289		dma_addr = pci_map_page(pci_dev, page, page_offset, len,
290					PCI_DMA_TODEVICE);
291	}
292
293	/* Transfer ownership of the skb to the final buffer */
294	buffer->skb = skb;
295	buffer->continuation = false;
296
297	/* Pass off to hardware */
298	efx_nic_push_buffers(tx_queue);
299
300	return NETDEV_TX_OK;
301
302 pci_err:
303	netif_err(efx, tx_err, efx->net_dev,
304		  " TX queue %d could not map skb with %d bytes %d "
305		  "fragments for DMA\n", tx_queue->queue, skb->len,
306		  skb_shinfo(skb)->nr_frags + 1);
307
308	/* Mark the packet as transmitted, and free the SKB ourselves */
309	dev_kfree_skb_any(skb);
310	goto unwind;
311
312 stop:
313	rc = NETDEV_TX_BUSY;
314
315	if (tx_queue->stopped == 1)
316		efx_stop_queue(tx_queue->channel);
317
318 unwind:
319	/* Work backwards until we hit the original insert pointer value */
320	while (tx_queue->insert_count != tx_queue->write_count) {
321		--tx_queue->insert_count;
322		insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
323		buffer = &tx_queue->buffer[insert_ptr];
324		efx_dequeue_buffer(tx_queue, buffer);
325		buffer->len = 0;
326	}
327
328	/* Free the fragment we were mid-way through pushing */
329	if (unmap_len) {
330		if (unmap_single)
331			pci_unmap_single(pci_dev, unmap_addr, unmap_len,
332					 PCI_DMA_TODEVICE);
333		else
334			pci_unmap_page(pci_dev, unmap_addr, unmap_len,
335				       PCI_DMA_TODEVICE);
336	}
337
338	return rc;
339}
340
341/* Remove packets from the TX queue
342 *
343 * This removes packets from the TX queue, up to and including the
344 * specified index.
345 */
346static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
347				unsigned int index)
348{
349	struct efx_nic *efx = tx_queue->efx;
350	unsigned int stop_index, read_ptr;
351
352	stop_index = (index + 1) & EFX_TXQ_MASK;
353	read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
354
355	while (read_ptr != stop_index) {
356		struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
357		if (unlikely(buffer->len == 0)) {
358			netif_err(efx, tx_err, efx->net_dev,
359				  "TX queue %d spurious TX completion id %x\n",
360				  tx_queue->queue, read_ptr);
361			efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
362			return;
363		}
364
365		efx_dequeue_buffer(tx_queue, buffer);
366		buffer->continuation = true;
367		buffer->len = 0;
368
369		++tx_queue->read_count;
370		read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
371	}
372}
373
374/* Initiate a packet transmission.  We use one channel per CPU
375 * (sharing when we have more CPUs than channels).  On Falcon, the TX
376 * completion events will be directed back to the CPU that transmitted
377 * the packet, which should be cache-efficient.
378 *
379 * Context: non-blocking.
380 * Note that returning anything other than NETDEV_TX_OK will cause the
381 * OS to free the skb.
382 */
383netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
384				      struct net_device *net_dev)
385{
386	struct efx_nic *efx = netdev_priv(net_dev);
387	struct efx_tx_queue *tx_queue;
388
389	if (unlikely(efx->port_inhibited))
390		return NETDEV_TX_BUSY;
391
392	tx_queue = &efx->tx_queue[EFX_TXQ_TYPES * skb_get_queue_mapping(skb)];
393	if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
394		tx_queue += EFX_TXQ_TYPE_OFFLOAD;
395
396	return efx_enqueue_skb(tx_queue, skb);
397}
398
399void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
400{
401	unsigned fill_level;
402	struct efx_nic *efx = tx_queue->efx;
403
404	EFX_BUG_ON_PARANOID(index > EFX_TXQ_MASK);
405
406	efx_dequeue_buffers(tx_queue, index);
407
408	/* See if we need to restart the netif queue.  This barrier
409	 * separates the update of read_count from the test of
410	 * stopped. */
411	smp_mb();
412	if (unlikely(tx_queue->stopped) && likely(efx->port_enabled)) {
413		fill_level = tx_queue->insert_count - tx_queue->read_count;
414		if (fill_level < EFX_TXQ_THRESHOLD) {
415			EFX_BUG_ON_PARANOID(!efx_dev_registered(efx));
416
417			/* Do this under netif_tx_lock(), to avoid racing
418			 * with efx_xmit(). */
419			netif_tx_lock(efx->net_dev);
420			if (tx_queue->stopped) {
421				tx_queue->stopped = 0;
422				efx_wake_queue(tx_queue->channel);
423			}
424			netif_tx_unlock(efx->net_dev);
425		}
426	}
427}
428
429int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
430{
431	struct efx_nic *efx = tx_queue->efx;
432	unsigned int txq_size;
433	int i, rc;
434
435	netif_dbg(efx, probe, efx->net_dev, "creating TX queue %d\n",
436		  tx_queue->queue);
437
438	/* Allocate software ring */
439	txq_size = EFX_TXQ_SIZE * sizeof(*tx_queue->buffer);
440	tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
441	if (!tx_queue->buffer)
442		return -ENOMEM;
443	for (i = 0; i <= EFX_TXQ_MASK; ++i)
444		tx_queue->buffer[i].continuation = true;
445
446	/* Allocate hardware ring */
447	rc = efx_nic_probe_tx(tx_queue);
448	if (rc)
449		goto fail;
450
451	return 0;
452
453 fail:
454	kfree(tx_queue->buffer);
455	tx_queue->buffer = NULL;
456	return rc;
457}
458
459void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
460{
461	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
462		  "initialising TX queue %d\n", tx_queue->queue);
463
464	tx_queue->insert_count = 0;
465	tx_queue->write_count = 0;
466	tx_queue->read_count = 0;
467	tx_queue->old_read_count = 0;
468	BUG_ON(tx_queue->stopped);
469
470	/* Set up TX descriptor ring */
471	efx_nic_init_tx(tx_queue);
472}
473
474void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
475{
476	struct efx_tx_buffer *buffer;
477
478	if (!tx_queue->buffer)
479		return;
480
481	/* Free any buffers left in the ring */
482	while (tx_queue->read_count != tx_queue->write_count) {
483		buffer = &tx_queue->buffer[tx_queue->read_count & EFX_TXQ_MASK];
484		efx_dequeue_buffer(tx_queue, buffer);
485		buffer->continuation = true;
486		buffer->len = 0;
487
488		++tx_queue->read_count;
489	}
490}
491
492void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
493{
494	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
495		  "shutting down TX queue %d\n", tx_queue->queue);
496
497	/* Flush TX queue, remove descriptor ring */
498	efx_nic_fini_tx(tx_queue);
499
500	efx_release_tx_buffers(tx_queue);
501
502	/* Free up TSO header cache */
503	efx_fini_tso(tx_queue);
504
505	/* Release queue's stop on port, if any */
506	if (tx_queue->stopped) {
507		tx_queue->stopped = 0;
508		efx_wake_queue(tx_queue->channel);
509	}
510}
511
512void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
513{
514	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
515		  "destroying TX queue %d\n", tx_queue->queue);
516	efx_nic_remove_tx(tx_queue);
517
518	kfree(tx_queue->buffer);
519	tx_queue->buffer = NULL;
520}
521
522
523/* Efx TCP segmentation acceleration.
524 *
525 * Why?  Because by doing it here in the driver we can go significantly
526 * faster than the GSO.
527 *
528 * Requires TX checksum offload support.
529 */
530
531/* Number of bytes inserted at the start of a TSO header buffer,
532 * similar to NET_IP_ALIGN.
533 */
534#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
535#define TSOH_OFFSET	0
536#else
537#define TSOH_OFFSET	NET_IP_ALIGN
538#endif
539
540#define TSOH_BUFFER(tsoh)	((u8 *)(tsoh + 1) + TSOH_OFFSET)
541
542/* Total size of struct efx_tso_header, buffer and padding */
543#define TSOH_SIZE(hdr_len)					\
544	(sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
545
546/* Size of blocks on free list.  Larger blocks must be allocated from
547 * the heap.
548 */
549#define TSOH_STD_SIZE		128
550
551#define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
552#define ETH_HDR_LEN(skb)  (skb_network_header(skb) - (skb)->data)
553#define SKB_TCP_OFF(skb)  PTR_DIFF(tcp_hdr(skb), (skb)->data)
554#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
555#define SKB_IPV6_OFF(skb) PTR_DIFF(ipv6_hdr(skb), (skb)->data)
556
557/**
558 * struct tso_state - TSO state for an SKB
559 * @out_len: Remaining length in current segment
560 * @seqnum: Current sequence number
561 * @ipv4_id: Current IPv4 ID, host endian
562 * @packet_space: Remaining space in current packet
563 * @dma_addr: DMA address of current position
564 * @in_len: Remaining length in current SKB fragment
565 * @unmap_len: Length of SKB fragment
566 * @unmap_addr: DMA address of SKB fragment
567 * @unmap_single: DMA single vs page mapping flag
568 * @protocol: Network protocol (after any VLAN header)
569 * @header_len: Number of bytes of header
570 * @full_packet_size: Number of bytes to put in each outgoing segment
571 *
572 * The state used during segmentation.  It is put into this data structure
573 * just to make it easy to pass into inline functions.
574 */
575struct tso_state {
576	/* Output position */
577	unsigned out_len;
578	unsigned seqnum;
579	unsigned ipv4_id;
580	unsigned packet_space;
581
582	/* Input position */
583	dma_addr_t dma_addr;
584	unsigned in_len;
585	unsigned unmap_len;
586	dma_addr_t unmap_addr;
587	bool unmap_single;
588
589	__be16 protocol;
590	unsigned header_len;
591	int full_packet_size;
592};
593
594
595/*
596 * Verify that our various assumptions about sk_buffs and the conditions
597 * under which TSO will be attempted hold true.  Return the protocol number.
598 */
599static __be16 efx_tso_check_protocol(struct sk_buff *skb)
600{
601	__be16 protocol = skb->protocol;
602
603	EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
604			    protocol);
605	if (protocol == htons(ETH_P_8021Q)) {
606		/* Find the encapsulated protocol; reset network header
607		 * and transport header based on that. */
608		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
609		protocol = veh->h_vlan_encapsulated_proto;
610		skb_set_network_header(skb, sizeof(*veh));
611		if (protocol == htons(ETH_P_IP))
612			skb_set_transport_header(skb, sizeof(*veh) +
613						 4 * ip_hdr(skb)->ihl);
614		else if (protocol == htons(ETH_P_IPV6))
615			skb_set_transport_header(skb, sizeof(*veh) +
616						 sizeof(struct ipv6hdr));
617	}
618
619	if (protocol == htons(ETH_P_IP)) {
620		EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
621	} else {
622		EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IPV6));
623		EFX_BUG_ON_PARANOID(ipv6_hdr(skb)->nexthdr != NEXTHDR_TCP);
624	}
625	EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
626			     + (tcp_hdr(skb)->doff << 2u)) >
627			    skb_headlen(skb));
628
629	return protocol;
630}
631
632
633/*
634 * Allocate a page worth of efx_tso_header structures, and string them
635 * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
636 */
637static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
638{
639
640	struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
641	struct efx_tso_header *tsoh;
642	dma_addr_t dma_addr;
643	u8 *base_kva, *kva;
644
645	base_kva = pci_alloc_consistent(pci_dev, PAGE_SIZE, &dma_addr);
646	if (base_kva == NULL) {
647		netif_err(tx_queue->efx, tx_err, tx_queue->efx->net_dev,
648			  "Unable to allocate page for TSO headers\n");
649		return -ENOMEM;
650	}
651
652	/* pci_alloc_consistent() allocates pages. */
653	EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
654
655	for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
656		tsoh = (struct efx_tso_header *)kva;
657		tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
658		tsoh->next = tx_queue->tso_headers_free;
659		tx_queue->tso_headers_free = tsoh;
660	}
661
662	return 0;
663}
664
665
666/* Free up a TSO header, and all others in the same page. */
667static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
668				struct efx_tso_header *tsoh,
669				struct pci_dev *pci_dev)
670{
671	struct efx_tso_header **p;
672	unsigned long base_kva;
673	dma_addr_t base_dma;
674
675	base_kva = (unsigned long)tsoh & PAGE_MASK;
676	base_dma = tsoh->dma_addr & PAGE_MASK;
677
678	p = &tx_queue->tso_headers_free;
679	while (*p != NULL) {
680		if (((unsigned long)*p & PAGE_MASK) == base_kva)
681			*p = (*p)->next;
682		else
683			p = &(*p)->next;
684	}
685
686	pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma);
687}
688
689static struct efx_tso_header *
690efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
691{
692	struct efx_tso_header *tsoh;
693
694	tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
695	if (unlikely(!tsoh))
696		return NULL;
697
698	tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev,
699					TSOH_BUFFER(tsoh), header_len,
700					PCI_DMA_TODEVICE);
701	if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev,
702					   tsoh->dma_addr))) {
703		kfree(tsoh);
704		return NULL;
705	}
706
707	tsoh->unmap_len = header_len;
708	return tsoh;
709}
710
711static void
712efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
713{
714	pci_unmap_single(tx_queue->efx->pci_dev,
715			 tsoh->dma_addr, tsoh->unmap_len,
716			 PCI_DMA_TODEVICE);
717	kfree(tsoh);
718}
719
720/**
721 * efx_tx_queue_insert - push descriptors onto the TX queue
722 * @tx_queue:		Efx TX queue
723 * @dma_addr:		DMA address of fragment
724 * @len:		Length of fragment
725 * @final_buffer:	The final buffer inserted into the queue
726 *
727 * Push descriptors onto the TX queue.  Return 0 on success or 1 if
728 * @tx_queue full.
729 */
730static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
731			       dma_addr_t dma_addr, unsigned len,
732			       struct efx_tx_buffer **final_buffer)
733{
734	struct efx_tx_buffer *buffer;
735	struct efx_nic *efx = tx_queue->efx;
736	unsigned dma_len, fill_level, insert_ptr;
737	int q_space;
738
739	EFX_BUG_ON_PARANOID(len <= 0);
740
741	fill_level = tx_queue->insert_count - tx_queue->old_read_count;
742	/* -1 as there is no way to represent all descriptors used */
743	q_space = EFX_TXQ_MASK - 1 - fill_level;
744
745	while (1) {
746		if (unlikely(q_space-- <= 0)) {
747			/* It might be that completions have happened
748			 * since the xmit path last checked.  Update
749			 * the xmit path's copy of read_count.
750			 */
751			++tx_queue->stopped;
752			/* This memory barrier protects the change of
753			 * stopped from the access of read_count. */
754			smp_mb();
755			tx_queue->old_read_count =
756				*(volatile unsigned *)&tx_queue->read_count;
757			fill_level = (tx_queue->insert_count
758				      - tx_queue->old_read_count);
759			q_space = EFX_TXQ_MASK - 1 - fill_level;
760			if (unlikely(q_space-- <= 0)) {
761				*final_buffer = NULL;
762				return 1;
763			}
764			smp_mb();
765			--tx_queue->stopped;
766		}
767
768		insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
769		buffer = &tx_queue->buffer[insert_ptr];
770		++tx_queue->insert_count;
771
772		EFX_BUG_ON_PARANOID(tx_queue->insert_count -
773				    tx_queue->read_count >
774				    EFX_TXQ_MASK);
775
776		efx_tsoh_free(tx_queue, buffer);
777		EFX_BUG_ON_PARANOID(buffer->len);
778		EFX_BUG_ON_PARANOID(buffer->unmap_len);
779		EFX_BUG_ON_PARANOID(buffer->skb);
780		EFX_BUG_ON_PARANOID(!buffer->continuation);
781		EFX_BUG_ON_PARANOID(buffer->tsoh);
782
783		buffer->dma_addr = dma_addr;
784
785		dma_len = efx_max_tx_len(efx, dma_addr);
786
787		/* If there is enough space to send then do so */
788		if (dma_len >= len)
789			break;
790
791		buffer->len = dma_len; /* Don't set the other members */
792		dma_addr += dma_len;
793		len -= dma_len;
794	}
795
796	EFX_BUG_ON_PARANOID(!len);
797	buffer->len = len;
798	*final_buffer = buffer;
799	return 0;
800}
801
802
803/*
804 * Put a TSO header into the TX queue.
805 *
806 * This is special-cased because we know that it is small enough to fit in
807 * a single fragment, and we know it doesn't cross a page boundary.  It
808 * also allows us to not worry about end-of-packet etc.
809 */
810static void efx_tso_put_header(struct efx_tx_queue *tx_queue,
811			       struct efx_tso_header *tsoh, unsigned len)
812{
813	struct efx_tx_buffer *buffer;
814
815	buffer = &tx_queue->buffer[tx_queue->insert_count & EFX_TXQ_MASK];
816	efx_tsoh_free(tx_queue, buffer);
817	EFX_BUG_ON_PARANOID(buffer->len);
818	EFX_BUG_ON_PARANOID(buffer->unmap_len);
819	EFX_BUG_ON_PARANOID(buffer->skb);
820	EFX_BUG_ON_PARANOID(!buffer->continuation);
821	EFX_BUG_ON_PARANOID(buffer->tsoh);
822	buffer->len = len;
823	buffer->dma_addr = tsoh->dma_addr;
824	buffer->tsoh = tsoh;
825
826	++tx_queue->insert_count;
827}
828
829
830/* Remove descriptors put into a tx_queue. */
831static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
832{
833	struct efx_tx_buffer *buffer;
834	dma_addr_t unmap_addr;
835
836	/* Work backwards until we hit the original insert pointer value */
837	while (tx_queue->insert_count != tx_queue->write_count) {
838		--tx_queue->insert_count;
839		buffer = &tx_queue->buffer[tx_queue->insert_count &
840					   EFX_TXQ_MASK];
841		efx_tsoh_free(tx_queue, buffer);
842		EFX_BUG_ON_PARANOID(buffer->skb);
843		if (buffer->unmap_len) {
844			unmap_addr = (buffer->dma_addr + buffer->len -
845				      buffer->unmap_len);
846			if (buffer->unmap_single)
847				pci_unmap_single(tx_queue->efx->pci_dev,
848						 unmap_addr, buffer->unmap_len,
849						 PCI_DMA_TODEVICE);
850			else
851				pci_unmap_page(tx_queue->efx->pci_dev,
852					       unmap_addr, buffer->unmap_len,
853					       PCI_DMA_TODEVICE);
854			buffer->unmap_len = 0;
855		}
856		buffer->len = 0;
857		buffer->continuation = true;
858	}
859}
860
861
862/* Parse the SKB header and initialise state. */
863static void tso_start(struct tso_state *st, const struct sk_buff *skb)
864{
865	/* All ethernet/IP/TCP headers combined size is TCP header size
866	 * plus offset of TCP header relative to start of packet.
867	 */
868	st->header_len = ((tcp_hdr(skb)->doff << 2u)
869			  + PTR_DIFF(tcp_hdr(skb), skb->data));
870	st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size;
871
872	if (st->protocol == htons(ETH_P_IP))
873		st->ipv4_id = ntohs(ip_hdr(skb)->id);
874	else
875		st->ipv4_id = 0;
876	st->seqnum = ntohl(tcp_hdr(skb)->seq);
877
878	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
879	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
880	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
881
882	st->packet_space = st->full_packet_size;
883	st->out_len = skb->len - st->header_len;
884	st->unmap_len = 0;
885	st->unmap_single = false;
886}
887
888static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
889			    skb_frag_t *frag)
890{
891	st->unmap_addr = pci_map_page(efx->pci_dev, frag->page,
892				      frag->page_offset, frag->size,
893				      PCI_DMA_TODEVICE);
894	if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
895		st->unmap_single = false;
896		st->unmap_len = frag->size;
897		st->in_len = frag->size;
898		st->dma_addr = st->unmap_addr;
899		return 0;
900	}
901	return -ENOMEM;
902}
903
904static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
905				 const struct sk_buff *skb)
906{
907	int hl = st->header_len;
908	int len = skb_headlen(skb) - hl;
909
910	st->unmap_addr = pci_map_single(efx->pci_dev, skb->data + hl,
911					len, PCI_DMA_TODEVICE);
912	if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
913		st->unmap_single = true;
914		st->unmap_len = len;
915		st->in_len = len;
916		st->dma_addr = st->unmap_addr;
917		return 0;
918	}
919	return -ENOMEM;
920}
921
922
923/**
924 * tso_fill_packet_with_fragment - form descriptors for the current fragment
925 * @tx_queue:		Efx TX queue
926 * @skb:		Socket buffer
927 * @st:			TSO state
928 *
929 * Form descriptors for the current fragment, until we reach the end
930 * of fragment or end-of-packet.  Return 0 on success, 1 if not enough
931 * space in @tx_queue.
932 */
933static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
934					 const struct sk_buff *skb,
935					 struct tso_state *st)
936{
937	struct efx_tx_buffer *buffer;
938	int n, end_of_packet, rc;
939
940	if (st->in_len == 0)
941		return 0;
942	if (st->packet_space == 0)
943		return 0;
944
945	EFX_BUG_ON_PARANOID(st->in_len <= 0);
946	EFX_BUG_ON_PARANOID(st->packet_space <= 0);
947
948	n = min(st->in_len, st->packet_space);
949
950	st->packet_space -= n;
951	st->out_len -= n;
952	st->in_len -= n;
953
954	rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
955	if (likely(rc == 0)) {
956		if (st->out_len == 0)
957			/* Transfer ownership of the skb */
958			buffer->skb = skb;
959
960		end_of_packet = st->out_len == 0 || st->packet_space == 0;
961		buffer->continuation = !end_of_packet;
962
963		if (st->in_len == 0) {
964			/* Transfer ownership of the pci mapping */
965			buffer->unmap_len = st->unmap_len;
966			buffer->unmap_single = st->unmap_single;
967			st->unmap_len = 0;
968		}
969	}
970
971	st->dma_addr += n;
972	return rc;
973}
974
975
976/**
977 * tso_start_new_packet - generate a new header and prepare for the new packet
978 * @tx_queue:		Efx TX queue
979 * @skb:		Socket buffer
980 * @st:			TSO state
981 *
982 * Generate a new header and prepare for the new packet.  Return 0 on
983 * success, or -1 if failed to alloc header.
984 */
985static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
986				const struct sk_buff *skb,
987				struct tso_state *st)
988{
989	struct efx_tso_header *tsoh;
990	struct tcphdr *tsoh_th;
991	unsigned ip_length;
992	u8 *header;
993
994	/* Allocate a DMA-mapped header buffer. */
995	if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) {
996		if (tx_queue->tso_headers_free == NULL) {
997			if (efx_tsoh_block_alloc(tx_queue))
998				return -1;
999		}
1000		EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
1001		tsoh = tx_queue->tso_headers_free;
1002		tx_queue->tso_headers_free = tsoh->next;
1003		tsoh->unmap_len = 0;
1004	} else {
1005		tx_queue->tso_long_headers++;
1006		tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
1007		if (unlikely(!tsoh))
1008			return -1;
1009	}
1010
1011	header = TSOH_BUFFER(tsoh);
1012	tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
1013
1014	/* Copy and update the headers. */
1015	memcpy(header, skb->data, st->header_len);
1016
1017	tsoh_th->seq = htonl(st->seqnum);
1018	st->seqnum += skb_shinfo(skb)->gso_size;
1019	if (st->out_len > skb_shinfo(skb)->gso_size) {
1020		/* This packet will not finish the TSO burst. */
1021		ip_length = st->full_packet_size - ETH_HDR_LEN(skb);
1022		tsoh_th->fin = 0;
1023		tsoh_th->psh = 0;
1024	} else {
1025		/* This packet will be the last in the TSO burst. */
1026		ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len;
1027		tsoh_th->fin = tcp_hdr(skb)->fin;
1028		tsoh_th->psh = tcp_hdr(skb)->psh;
1029	}
1030
1031	if (st->protocol == htons(ETH_P_IP)) {
1032		struct iphdr *tsoh_iph =
1033			(struct iphdr *)(header + SKB_IPV4_OFF(skb));
1034
1035		tsoh_iph->tot_len = htons(ip_length);
1036
1037		/* Linux leaves suitable gaps in the IP ID space for us to fill. */
1038		tsoh_iph->id = htons(st->ipv4_id);
1039		st->ipv4_id++;
1040	} else {
1041		struct ipv6hdr *tsoh_iph =
1042			(struct ipv6hdr *)(header + SKB_IPV6_OFF(skb));
1043
1044		tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph));
1045	}
1046
1047	st->packet_space = skb_shinfo(skb)->gso_size;
1048	++tx_queue->tso_packets;
1049
1050	/* Form a descriptor for this header. */
1051	efx_tso_put_header(tx_queue, tsoh, st->header_len);
1052
1053	return 0;
1054}
1055
1056
1057/**
1058 * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
1059 * @tx_queue:		Efx TX queue
1060 * @skb:		Socket buffer
1061 *
1062 * Context: You must hold netif_tx_lock() to call this function.
1063 *
1064 * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
1065 * @skb was not enqueued.  In all cases @skb is consumed.  Return
1066 * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
1067 */
1068static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1069			       struct sk_buff *skb)
1070{
1071	struct efx_nic *efx = tx_queue->efx;
1072	int frag_i, rc, rc2 = NETDEV_TX_OK;
1073	struct tso_state state;
1074
1075	/* Find the packet protocol and sanity-check it */
1076	state.protocol = efx_tso_check_protocol(skb);
1077
1078	EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
1079
1080	tso_start(&state, skb);
1081
1082	/* Assume that skb header area contains exactly the headers, and
1083	 * all payload is in the frag list.
1084	 */
1085	if (skb_headlen(skb) == state.header_len) {
1086		/* Grab the first payload fragment. */
1087		EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
1088		frag_i = 0;
1089		rc = tso_get_fragment(&state, efx,
1090				      skb_shinfo(skb)->frags + frag_i);
1091		if (rc)
1092			goto mem_err;
1093	} else {
1094		rc = tso_get_head_fragment(&state, efx, skb);
1095		if (rc)
1096			goto mem_err;
1097		frag_i = -1;
1098	}
1099
1100	if (tso_start_new_packet(tx_queue, skb, &state) < 0)
1101		goto mem_err;
1102
1103	while (1) {
1104		rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
1105		if (unlikely(rc))
1106			goto stop;
1107
1108		/* Move onto the next fragment? */
1109		if (state.in_len == 0) {
1110			if (++frag_i >= skb_shinfo(skb)->nr_frags)
1111				/* End of payload reached. */
1112				break;
1113			rc = tso_get_fragment(&state, efx,
1114					      skb_shinfo(skb)->frags + frag_i);
1115			if (rc)
1116				goto mem_err;
1117		}
1118
1119		/* Start at new packet? */
1120		if (state.packet_space == 0 &&
1121		    tso_start_new_packet(tx_queue, skb, &state) < 0)
1122			goto mem_err;
1123	}
1124
1125	/* Pass off to hardware */
1126	efx_nic_push_buffers(tx_queue);
1127
1128	tx_queue->tso_bursts++;
1129	return NETDEV_TX_OK;
1130
1131 mem_err:
1132	netif_err(efx, tx_err, efx->net_dev,
1133		  "Out of memory for TSO headers, or PCI mapping error\n");
1134	dev_kfree_skb_any(skb);
1135	goto unwind;
1136
1137 stop:
1138	rc2 = NETDEV_TX_BUSY;
1139
1140	/* Stop the queue if it wasn't stopped before. */
1141	if (tx_queue->stopped == 1)
1142		efx_stop_queue(tx_queue->channel);
1143
1144 unwind:
1145	/* Free the DMA mapping we were in the process of writing out */
1146	if (state.unmap_len) {
1147		if (state.unmap_single)
1148			pci_unmap_single(efx->pci_dev, state.unmap_addr,
1149					 state.unmap_len, PCI_DMA_TODEVICE);
1150		else
1151			pci_unmap_page(efx->pci_dev, state.unmap_addr,
1152				       state.unmap_len, PCI_DMA_TODEVICE);
1153	}
1154
1155	efx_enqueue_unwind(tx_queue);
1156	return rc2;
1157}
1158
1159
1160/*
1161 * Free up all TSO datastructures associated with tx_queue. This
1162 * routine should be called only once the tx_queue is both empty and
1163 * will no longer be used.
1164 */
1165static void efx_fini_tso(struct efx_tx_queue *tx_queue)
1166{
1167	unsigned i;
1168
1169	if (tx_queue->buffer) {
1170		for (i = 0; i <= EFX_TXQ_MASK; ++i)
1171			efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
1172	}
1173
1174	while (tx_queue->tso_headers_free != NULL)
1175		efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
1176				    tx_queue->efx->pci_dev);
1177}
1178