• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/drivers/net/cxgb3/
1/*
2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <linux/skbuff.h>
33#include <linux/netdevice.h>
34#include <linux/etherdevice.h>
35#include <linux/if_vlan.h>
36#include <linux/ip.h>
37#include <linux/tcp.h>
38#include <linux/dma-mapping.h>
39#include <linux/slab.h>
40#include <net/arp.h>
41#include "common.h"
42#include "regs.h"
43#include "sge_defs.h"
44#include "t3_cpl.h"
45#include "firmware_exports.h"
46#include "cxgb3_offload.h"
47
48#define USE_GTS 0
49
50#define SGE_RX_SM_BUF_SIZE 1536
51
52#define SGE_RX_COPY_THRES  256
53#define SGE_RX_PULL_LEN    128
54
55#define SGE_PG_RSVD SMP_CACHE_BYTES
56/*
57 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
58 * It must be a divisor of PAGE_SIZE.  If set to 0 FL0 will use sk_buffs
59 * directly.
60 */
61#define FL0_PG_CHUNK_SIZE  2048
62#define FL0_PG_ORDER 0
63#define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
64#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
65#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
66#define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
67
68#define SGE_RX_DROP_THRES 16
69#define RX_RECLAIM_PERIOD (HZ/4)
70
71/*
72 * Max number of Rx buffers we replenish at a time.
73 */
74#define MAX_RX_REFILL 16U
75/*
76 * Period of the Tx buffer reclaim timer.  This timer does not need to run
77 * frequently as Tx buffers are usually reclaimed by new Tx packets.
78 */
79#define TX_RECLAIM_PERIOD (HZ / 4)
80#define TX_RECLAIM_TIMER_CHUNK 64U
81#define TX_RECLAIM_CHUNK 16U
82
83/* WR size in bytes */
84#define WR_LEN (WR_FLITS * 8)
85
86/*
87 * Types of Tx queues in each queue set.  Order here matters, do not change.
88 */
89enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
90
91/* Values for sge_txq.flags */
92enum {
93	TXQ_RUNNING = 1 << 0,	/* fetch engine is running */
94	TXQ_LAST_PKT_DB = 1 << 1,	/* last packet rang the doorbell */
95};
96
97struct tx_desc {
98	__be64 flit[TX_DESC_FLITS];
99};
100
101struct rx_desc {
102	__be32 addr_lo;
103	__be32 len_gen;
104	__be32 gen2;
105	__be32 addr_hi;
106};
107
108struct tx_sw_desc {		/* SW state per Tx descriptor */
109	struct sk_buff *skb;
110	u8 eop;       /* set if last descriptor for packet */
111	u8 addr_idx;  /* buffer index of first SGL entry in descriptor */
112	u8 fragidx;   /* first page fragment associated with descriptor */
113	s8 sflit;     /* start flit of first SGL entry in descriptor */
114};
115
116struct rx_sw_desc {                /* SW state per Rx descriptor */
117	union {
118		struct sk_buff *skb;
119		struct fl_pg_chunk pg_chunk;
120	};
121	DEFINE_DMA_UNMAP_ADDR(dma_addr);
122};
123
124struct rsp_desc {		/* response queue descriptor */
125	struct rss_header rss_hdr;
126	__be32 flags;
127	__be32 len_cq;
128	u8 imm_data[47];
129	u8 intr_gen;
130};
131
132/*
133 * Holds unmapping information for Tx packets that need deferred unmapping.
134 * This structure lives at skb->head and must be allocated by callers.
135 */
136struct deferred_unmap_info {
137	struct pci_dev *pdev;
138	dma_addr_t addr[MAX_SKB_FRAGS + 1];
139};
140
141/*
142 * Maps a number of flits to the number of Tx descriptors that can hold them.
143 * The formula is
144 *
145 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
146 *
147 * HW allows up to 4 descriptors to be combined into a WR.
148 */
149static u8 flit_desc_map[] = {
150	0,
151#if SGE_NUM_GENBITS == 1
152	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
153	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
154	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
155	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
156#elif SGE_NUM_GENBITS == 2
157	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
159	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
160	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
161#else
162# error "SGE_NUM_GENBITS must be 1 or 2"
163#endif
164};
165
166static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
167{
168	return container_of(q, struct sge_qset, fl[qidx]);
169}
170
171static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
172{
173	return container_of(q, struct sge_qset, rspq);
174}
175
176static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
177{
178	return container_of(q, struct sge_qset, txq[qidx]);
179}
180
181/**
182 *	refill_rspq - replenish an SGE response queue
183 *	@adapter: the adapter
184 *	@q: the response queue to replenish
185 *	@credits: how many new responses to make available
186 *
187 *	Replenishes a response queue by making the supplied number of responses
188 *	available to HW.
189 */
190static inline void refill_rspq(struct adapter *adapter,
191			       const struct sge_rspq *q, unsigned int credits)
192{
193	rmb();
194	t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
195		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
196}
197
198/**
199 *	need_skb_unmap - does the platform need unmapping of sk_buffs?
200 *
201 *	Returns true if the platform needs sk_buff unmapping.  The compiler
202 *	optimizes away unecessary code if this returns true.
203 */
204static inline int need_skb_unmap(void)
205{
206#ifdef CONFIG_NEED_DMA_MAP_STATE
207	return 1;
208#else
209	return 0;
210#endif
211}
212
213/**
214 *	unmap_skb - unmap a packet main body and its page fragments
215 *	@skb: the packet
216 *	@q: the Tx queue containing Tx descriptors for the packet
217 *	@cidx: index of Tx descriptor
218 *	@pdev: the PCI device
219 *
220 *	Unmap the main body of an sk_buff and its page fragments, if any.
221 *	Because of the fairly complicated structure of our SGLs and the desire
222 *	to conserve space for metadata, the information necessary to unmap an
223 *	sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
224 *	descriptors (the physical addresses of the various data buffers), and
225 *	the SW descriptor state (assorted indices).  The send functions
226 *	initialize the indices for the first packet descriptor so we can unmap
227 *	the buffers held in the first Tx descriptor here, and we have enough
228 *	information at this point to set the state for the next Tx descriptor.
229 *
230 *	Note that it is possible to clean up the first descriptor of a packet
231 *	before the send routines have written the next descriptors, but this
232 *	race does not cause any problem.  We just end up writing the unmapping
233 *	info for the descriptor first.
234 */
235static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
236			     unsigned int cidx, struct pci_dev *pdev)
237{
238	const struct sg_ent *sgp;
239	struct tx_sw_desc *d = &q->sdesc[cidx];
240	int nfrags, frag_idx, curflit, j = d->addr_idx;
241
242	sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
243	frag_idx = d->fragidx;
244
245	if (frag_idx == 0 && skb_headlen(skb)) {
246		pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
247				 skb_headlen(skb), PCI_DMA_TODEVICE);
248		j = 1;
249	}
250
251	curflit = d->sflit + 1 + j;
252	nfrags = skb_shinfo(skb)->nr_frags;
253
254	while (frag_idx < nfrags && curflit < WR_FLITS) {
255		pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
256			       skb_shinfo(skb)->frags[frag_idx].size,
257			       PCI_DMA_TODEVICE);
258		j ^= 1;
259		if (j == 0) {
260			sgp++;
261			curflit++;
262		}
263		curflit++;
264		frag_idx++;
265	}
266
267	if (frag_idx < nfrags) {   /* SGL continues into next Tx descriptor */
268		d = cidx + 1 == q->size ? q->sdesc : d + 1;
269		d->fragidx = frag_idx;
270		d->addr_idx = j;
271		d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
272	}
273}
274
275/**
276 *	free_tx_desc - reclaims Tx descriptors and their buffers
277 *	@adapter: the adapter
278 *	@q: the Tx queue to reclaim descriptors from
279 *	@n: the number of descriptors to reclaim
280 *
281 *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
282 *	Tx buffers.  Called with the Tx queue lock held.
283 */
284static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
285			 unsigned int n)
286{
287	struct tx_sw_desc *d;
288	struct pci_dev *pdev = adapter->pdev;
289	unsigned int cidx = q->cidx;
290
291	const int need_unmap = need_skb_unmap() &&
292			       q->cntxt_id >= FW_TUNNEL_SGEEC_START;
293
294	d = &q->sdesc[cidx];
295	while (n--) {
296		if (d->skb) {	/* an SGL is present */
297			if (need_unmap)
298				unmap_skb(d->skb, q, cidx, pdev);
299			if (d->eop)
300				kfree_skb(d->skb);
301		}
302		++d;
303		if (++cidx == q->size) {
304			cidx = 0;
305			d = q->sdesc;
306		}
307	}
308	q->cidx = cidx;
309}
310
311/**
312 *	reclaim_completed_tx - reclaims completed Tx descriptors
313 *	@adapter: the adapter
314 *	@q: the Tx queue to reclaim completed descriptors from
315 *	@chunk: maximum number of descriptors to reclaim
316 *
317 *	Reclaims Tx descriptors that the SGE has indicated it has processed,
318 *	and frees the associated buffers if possible.  Called with the Tx
319 *	queue's lock held.
320 */
321static inline unsigned int reclaim_completed_tx(struct adapter *adapter,
322						struct sge_txq *q,
323						unsigned int chunk)
324{
325	unsigned int reclaim = q->processed - q->cleaned;
326
327	reclaim = min(chunk, reclaim);
328	if (reclaim) {
329		free_tx_desc(adapter, q, reclaim);
330		q->cleaned += reclaim;
331		q->in_use -= reclaim;
332	}
333	return q->processed - q->cleaned;
334}
335
336/**
337 *	should_restart_tx - are there enough resources to restart a Tx queue?
338 *	@q: the Tx queue
339 *
340 *	Checks if there are enough descriptors to restart a suspended Tx queue.
341 */
342static inline int should_restart_tx(const struct sge_txq *q)
343{
344	unsigned int r = q->processed - q->cleaned;
345
346	return q->in_use - r < (q->size >> 1);
347}
348
349static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
350			  struct rx_sw_desc *d)
351{
352	if (q->use_pages && d->pg_chunk.page) {
353		(*d->pg_chunk.p_cnt)--;
354		if (!*d->pg_chunk.p_cnt)
355			pci_unmap_page(pdev,
356				       d->pg_chunk.mapping,
357				       q->alloc_size, PCI_DMA_FROMDEVICE);
358
359		put_page(d->pg_chunk.page);
360		d->pg_chunk.page = NULL;
361	} else {
362		pci_unmap_single(pdev, dma_unmap_addr(d, dma_addr),
363				 q->buf_size, PCI_DMA_FROMDEVICE);
364		kfree_skb(d->skb);
365		d->skb = NULL;
366	}
367}
368
369/**
370 *	free_rx_bufs - free the Rx buffers on an SGE free list
371 *	@pdev: the PCI device associated with the adapter
372 *	@rxq: the SGE free list to clean up
373 *
374 *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
375 *	this queue should be stopped before calling this function.
376 */
377static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
378{
379	unsigned int cidx = q->cidx;
380
381	while (q->credits--) {
382		struct rx_sw_desc *d = &q->sdesc[cidx];
383
384
385		clear_rx_desc(pdev, q, d);
386		if (++cidx == q->size)
387			cidx = 0;
388	}
389
390	if (q->pg_chunk.page) {
391		__free_pages(q->pg_chunk.page, q->order);
392		q->pg_chunk.page = NULL;
393	}
394}
395
396/**
397 *	add_one_rx_buf - add a packet buffer to a free-buffer list
398 *	@va:  buffer start VA
399 *	@len: the buffer length
400 *	@d: the HW Rx descriptor to write
401 *	@sd: the SW Rx descriptor to write
402 *	@gen: the generation bit value
403 *	@pdev: the PCI device associated with the adapter
404 *
405 *	Add a buffer of the given length to the supplied HW and SW Rx
406 *	descriptors.
407 */
408static inline int add_one_rx_buf(void *va, unsigned int len,
409				 struct rx_desc *d, struct rx_sw_desc *sd,
410				 unsigned int gen, struct pci_dev *pdev)
411{
412	dma_addr_t mapping;
413
414	mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
415	if (unlikely(pci_dma_mapping_error(pdev, mapping)))
416		return -ENOMEM;
417
418	dma_unmap_addr_set(sd, dma_addr, mapping);
419
420	d->addr_lo = cpu_to_be32(mapping);
421	d->addr_hi = cpu_to_be32((u64) mapping >> 32);
422	wmb();
423	d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
424	d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
425	return 0;
426}
427
428static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
429				   unsigned int gen)
430{
431	d->addr_lo = cpu_to_be32(mapping);
432	d->addr_hi = cpu_to_be32((u64) mapping >> 32);
433	wmb();
434	d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
435	d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
436	return 0;
437}
438
439static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
440			  struct rx_sw_desc *sd, gfp_t gfp,
441			  unsigned int order)
442{
443	if (!q->pg_chunk.page) {
444		dma_addr_t mapping;
445
446		q->pg_chunk.page = alloc_pages(gfp, order);
447		if (unlikely(!q->pg_chunk.page))
448			return -ENOMEM;
449		q->pg_chunk.va = page_address(q->pg_chunk.page);
450		q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
451				    SGE_PG_RSVD;
452		q->pg_chunk.offset = 0;
453		mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
454				       0, q->alloc_size, PCI_DMA_FROMDEVICE);
455		q->pg_chunk.mapping = mapping;
456	}
457	sd->pg_chunk = q->pg_chunk;
458
459	prefetch(sd->pg_chunk.p_cnt);
460
461	q->pg_chunk.offset += q->buf_size;
462	if (q->pg_chunk.offset == (PAGE_SIZE << order))
463		q->pg_chunk.page = NULL;
464	else {
465		q->pg_chunk.va += q->buf_size;
466		get_page(q->pg_chunk.page);
467	}
468
469	if (sd->pg_chunk.offset == 0)
470		*sd->pg_chunk.p_cnt = 1;
471	else
472		*sd->pg_chunk.p_cnt += 1;
473
474	return 0;
475}
476
477static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
478{
479	if (q->pend_cred >= q->credits / 4) {
480		q->pend_cred = 0;
481		wmb();
482		t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
483	}
484}
485
486/**
487 *	refill_fl - refill an SGE free-buffer list
488 *	@adapter: the adapter
489 *	@q: the free-list to refill
490 *	@n: the number of new buffers to allocate
491 *	@gfp: the gfp flags for allocating new buffers
492 *
493 *	(Re)populate an SGE free-buffer list with up to @n new packet buffers,
494 *	allocated with the supplied gfp flags.  The caller must assure that
495 *	@n does not exceed the queue's capacity.
496 */
497static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
498{
499	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
500	struct rx_desc *d = &q->desc[q->pidx];
501	unsigned int count = 0;
502
503	while (n--) {
504		dma_addr_t mapping;
505		int err;
506
507		if (q->use_pages) {
508			if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
509						    q->order))) {
510nomem:				q->alloc_failed++;
511				break;
512			}
513			mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset;
514			dma_unmap_addr_set(sd, dma_addr, mapping);
515
516			add_one_rx_chunk(mapping, d, q->gen);
517			pci_dma_sync_single_for_device(adap->pdev, mapping,
518						q->buf_size - SGE_PG_RSVD,
519						PCI_DMA_FROMDEVICE);
520		} else {
521			void *buf_start;
522
523			struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
524			if (!skb)
525				goto nomem;
526
527			sd->skb = skb;
528			buf_start = skb->data;
529			err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
530					     q->gen, adap->pdev);
531			if (unlikely(err)) {
532				clear_rx_desc(adap->pdev, q, sd);
533				break;
534			}
535		}
536
537		d++;
538		sd++;
539		if (++q->pidx == q->size) {
540			q->pidx = 0;
541			q->gen ^= 1;
542			sd = q->sdesc;
543			d = q->desc;
544		}
545		count++;
546	}
547
548	q->credits += count;
549	q->pend_cred += count;
550	ring_fl_db(adap, q);
551
552	return count;
553}
554
555static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
556{
557	refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
558		  GFP_ATOMIC | __GFP_COMP);
559}
560
561/**
562 *	recycle_rx_buf - recycle a receive buffer
563 *	@adapter: the adapter
564 *	@q: the SGE free list
565 *	@idx: index of buffer to recycle
566 *
567 *	Recycles the specified buffer on the given free list by adding it at
568 *	the next available slot on the list.
569 */
570static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
571			   unsigned int idx)
572{
573	struct rx_desc *from = &q->desc[idx];
574	struct rx_desc *to = &q->desc[q->pidx];
575
576	q->sdesc[q->pidx] = q->sdesc[idx];
577	to->addr_lo = from->addr_lo;	/* already big endian */
578	to->addr_hi = from->addr_hi;	/* likewise */
579	wmb();
580	to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
581	to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
582
583	if (++q->pidx == q->size) {
584		q->pidx = 0;
585		q->gen ^= 1;
586	}
587
588	q->credits++;
589	q->pend_cred++;
590	ring_fl_db(adap, q);
591}
592
593/**
594 *	alloc_ring - allocate resources for an SGE descriptor ring
595 *	@pdev: the PCI device
596 *	@nelem: the number of descriptors
597 *	@elem_size: the size of each descriptor
598 *	@sw_size: the size of the SW state associated with each ring element
599 *	@phys: the physical address of the allocated ring
600 *	@metadata: address of the array holding the SW state for the ring
601 *
602 *	Allocates resources for an SGE descriptor ring, such as Tx queues,
603 *	free buffer lists, or response queues.  Each SGE ring requires
604 *	space for its HW descriptors plus, optionally, space for the SW state
605 *	associated with each HW entry (the metadata).  The function returns
606 *	three values: the virtual address for the HW ring (the return value
607 *	of the function), the physical address of the HW ring, and the address
608 *	of the SW ring.
609 */
610static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
611			size_t sw_size, dma_addr_t * phys, void *metadata)
612{
613	size_t len = nelem * elem_size;
614	void *s = NULL;
615	void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
616
617	if (!p)
618		return NULL;
619	if (sw_size && metadata) {
620		s = kcalloc(nelem, sw_size, GFP_KERNEL);
621
622		if (!s) {
623			dma_free_coherent(&pdev->dev, len, p, *phys);
624			return NULL;
625		}
626		*(void **)metadata = s;
627	}
628	memset(p, 0, len);
629	return p;
630}
631
632/**
633 *	t3_reset_qset - reset a sge qset
634 *	@q: the queue set
635 *
636 *	Reset the qset structure.
637 *	the NAPI structure is preserved in the event of
638 *	the qset's reincarnation, for example during EEH recovery.
639 */
640static void t3_reset_qset(struct sge_qset *q)
641{
642	if (q->adap &&
643	    !(q->adap->flags & NAPI_INIT)) {
644		memset(q, 0, sizeof(*q));
645		return;
646	}
647
648	q->adap = NULL;
649	memset(&q->rspq, 0, sizeof(q->rspq));
650	memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
651	memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
652	q->txq_stopped = 0;
653	q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
654	q->rx_reclaim_timer.function = NULL;
655	q->nomem = 0;
656	napi_free_frags(&q->napi);
657}
658
659
660/**
661 *	free_qset - free the resources of an SGE queue set
662 *	@adapter: the adapter owning the queue set
663 *	@q: the queue set
664 *
665 *	Release the HW and SW resources associated with an SGE queue set, such
666 *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
667 *	queue set must be quiesced prior to calling this.
668 */
669static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
670{
671	int i;
672	struct pci_dev *pdev = adapter->pdev;
673
674	for (i = 0; i < SGE_RXQ_PER_SET; ++i)
675		if (q->fl[i].desc) {
676			spin_lock_irq(&adapter->sge.reg_lock);
677			t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
678			spin_unlock_irq(&adapter->sge.reg_lock);
679			free_rx_bufs(pdev, &q->fl[i]);
680			kfree(q->fl[i].sdesc);
681			dma_free_coherent(&pdev->dev,
682					  q->fl[i].size *
683					  sizeof(struct rx_desc), q->fl[i].desc,
684					  q->fl[i].phys_addr);
685		}
686
687	for (i = 0; i < SGE_TXQ_PER_SET; ++i)
688		if (q->txq[i].desc) {
689			spin_lock_irq(&adapter->sge.reg_lock);
690			t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
691			spin_unlock_irq(&adapter->sge.reg_lock);
692			if (q->txq[i].sdesc) {
693				free_tx_desc(adapter, &q->txq[i],
694					     q->txq[i].in_use);
695				kfree(q->txq[i].sdesc);
696			}
697			dma_free_coherent(&pdev->dev,
698					  q->txq[i].size *
699					  sizeof(struct tx_desc),
700					  q->txq[i].desc, q->txq[i].phys_addr);
701			__skb_queue_purge(&q->txq[i].sendq);
702		}
703
704	if (q->rspq.desc) {
705		spin_lock_irq(&adapter->sge.reg_lock);
706		t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
707		spin_unlock_irq(&adapter->sge.reg_lock);
708		dma_free_coherent(&pdev->dev,
709				  q->rspq.size * sizeof(struct rsp_desc),
710				  q->rspq.desc, q->rspq.phys_addr);
711	}
712
713	t3_reset_qset(q);
714}
715
716/**
717 *	init_qset_cntxt - initialize an SGE queue set context info
718 *	@qs: the queue set
719 *	@id: the queue set id
720 *
721 *	Initializes the TIDs and context ids for the queues of a queue set.
722 */
723static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
724{
725	qs->rspq.cntxt_id = id;
726	qs->fl[0].cntxt_id = 2 * id;
727	qs->fl[1].cntxt_id = 2 * id + 1;
728	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
729	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
730	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
731	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
732	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
733}
734
735/**
736 *	sgl_len - calculates the size of an SGL of the given capacity
737 *	@n: the number of SGL entries
738 *
739 *	Calculates the number of flits needed for a scatter/gather list that
740 *	can hold the given number of entries.
741 */
742static inline unsigned int sgl_len(unsigned int n)
743{
744	/* alternatively: 3 * (n / 2) + 2 * (n & 1) */
745	return (3 * n) / 2 + (n & 1);
746}
747
748/**
749 *	flits_to_desc - returns the num of Tx descriptors for the given flits
750 *	@n: the number of flits
751 *
752 *	Calculates the number of Tx descriptors needed for the supplied number
753 *	of flits.
754 */
755static inline unsigned int flits_to_desc(unsigned int n)
756{
757	BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
758	return flit_desc_map[n];
759}
760
761/**
762 *	get_packet - return the next ingress packet buffer from a free list
763 *	@adap: the adapter that received the packet
764 *	@fl: the SGE free list holding the packet
765 *	@len: the packet length including any SGE padding
766 *	@drop_thres: # of remaining buffers before we start dropping packets
767 *
768 *	Get the next packet from a free list and complete setup of the
769 *	sk_buff.  If the packet is small we make a copy and recycle the
770 *	original buffer, otherwise we use the original buffer itself.  If a
771 *	positive drop threshold is supplied packets are dropped and their
772 *	buffers recycled if (a) the number of remaining buffers is under the
773 *	threshold and the packet is too big to copy, or (b) the packet should
774 *	be copied but there is no memory for the copy.
775 */
776static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
777				  unsigned int len, unsigned int drop_thres)
778{
779	struct sk_buff *skb = NULL;
780	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
781
782	prefetch(sd->skb->data);
783	fl->credits--;
784
785	if (len <= SGE_RX_COPY_THRES) {
786		skb = alloc_skb(len, GFP_ATOMIC);
787		if (likely(skb != NULL)) {
788			__skb_put(skb, len);
789			pci_dma_sync_single_for_cpu(adap->pdev,
790					    dma_unmap_addr(sd, dma_addr), len,
791					    PCI_DMA_FROMDEVICE);
792			memcpy(skb->data, sd->skb->data, len);
793			pci_dma_sync_single_for_device(adap->pdev,
794					    dma_unmap_addr(sd, dma_addr), len,
795					    PCI_DMA_FROMDEVICE);
796		} else if (!drop_thres)
797			goto use_orig_buf;
798recycle:
799		recycle_rx_buf(adap, fl, fl->cidx);
800		return skb;
801	}
802
803	if (unlikely(fl->credits < drop_thres) &&
804	    refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
805		      GFP_ATOMIC | __GFP_COMP) == 0)
806		goto recycle;
807
808use_orig_buf:
809	pci_unmap_single(adap->pdev, dma_unmap_addr(sd, dma_addr),
810			 fl->buf_size, PCI_DMA_FROMDEVICE);
811	skb = sd->skb;
812	skb_put(skb, len);
813	__refill_fl(adap, fl);
814	return skb;
815}
816
817/**
818 *	get_packet_pg - return the next ingress packet buffer from a free list
819 *	@adap: the adapter that received the packet
820 *	@fl: the SGE free list holding the packet
821 *	@len: the packet length including any SGE padding
822 *	@drop_thres: # of remaining buffers before we start dropping packets
823 *
824 *	Get the next packet from a free list populated with page chunks.
825 *	If the packet is small we make a copy and recycle the original buffer,
826 *	otherwise we attach the original buffer as a page fragment to a fresh
827 *	sk_buff.  If a positive drop threshold is supplied packets are dropped
828 *	and their buffers recycled if (a) the number of remaining buffers is
829 *	under the threshold and the packet is too big to copy, or (b) there's
830 *	no system memory.
831 *
832 * 	Note: this function is similar to @get_packet but deals with Rx buffers
833 * 	that are page chunks rather than sk_buffs.
834 */
835static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
836				     struct sge_rspq *q, unsigned int len,
837				     unsigned int drop_thres)
838{
839	struct sk_buff *newskb, *skb;
840	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
841
842	dma_addr_t dma_addr = dma_unmap_addr(sd, dma_addr);
843
844	newskb = skb = q->pg_skb;
845	if (!skb && (len <= SGE_RX_COPY_THRES)) {
846		newskb = alloc_skb(len, GFP_ATOMIC);
847		if (likely(newskb != NULL)) {
848			__skb_put(newskb, len);
849			pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
850					    PCI_DMA_FROMDEVICE);
851			memcpy(newskb->data, sd->pg_chunk.va, len);
852			pci_dma_sync_single_for_device(adap->pdev, dma_addr,
853						       len,
854						       PCI_DMA_FROMDEVICE);
855		} else if (!drop_thres)
856			return NULL;
857recycle:
858		fl->credits--;
859		recycle_rx_buf(adap, fl, fl->cidx);
860		q->rx_recycle_buf++;
861		return newskb;
862	}
863
864	if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
865		goto recycle;
866
867	prefetch(sd->pg_chunk.p_cnt);
868
869	if (!skb)
870		newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
871
872	if (unlikely(!newskb)) {
873		if (!drop_thres)
874			return NULL;
875		goto recycle;
876	}
877
878	pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
879				    PCI_DMA_FROMDEVICE);
880	(*sd->pg_chunk.p_cnt)--;
881	if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
882		pci_unmap_page(adap->pdev,
883			       sd->pg_chunk.mapping,
884			       fl->alloc_size,
885			       PCI_DMA_FROMDEVICE);
886	if (!skb) {
887		__skb_put(newskb, SGE_RX_PULL_LEN);
888		memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
889		skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
890				   sd->pg_chunk.offset + SGE_RX_PULL_LEN,
891				   len - SGE_RX_PULL_LEN);
892		newskb->len = len;
893		newskb->data_len = len - SGE_RX_PULL_LEN;
894		newskb->truesize += newskb->data_len;
895	} else {
896		skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
897				   sd->pg_chunk.page,
898				   sd->pg_chunk.offset, len);
899		newskb->len += len;
900		newskb->data_len += len;
901		newskb->truesize += len;
902	}
903
904	fl->credits--;
905	/*
906	 * We do not refill FLs here, we let the caller do it to overlap a
907	 * prefetch.
908	 */
909	return newskb;
910}
911
912/**
913 *	get_imm_packet - return the next ingress packet buffer from a response
914 *	@resp: the response descriptor containing the packet data
915 *
916 *	Return a packet containing the immediate data of the given response.
917 */
918static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
919{
920	struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
921
922	if (skb) {
923		__skb_put(skb, IMMED_PKT_SIZE);
924		skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
925	}
926	return skb;
927}
928
929/**
930 *	calc_tx_descs - calculate the number of Tx descriptors for a packet
931 *	@skb: the packet
932 *
933 * 	Returns the number of Tx descriptors needed for the given Ethernet
934 * 	packet.  Ethernet packets require addition of WR and CPL headers.
935 */
936static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
937{
938	unsigned int flits;
939
940	if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
941		return 1;
942
943	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
944	if (skb_shinfo(skb)->gso_size)
945		flits++;
946	return flits_to_desc(flits);
947}
948
949/**
950 *	make_sgl - populate a scatter/gather list for a packet
951 *	@skb: the packet
952 *	@sgp: the SGL to populate
953 *	@start: start address of skb main body data to include in the SGL
954 *	@len: length of skb main body data to include in the SGL
955 *	@pdev: the PCI device
956 *
957 *	Generates a scatter/gather list for the buffers that make up a packet
958 *	and returns the SGL size in 8-byte words.  The caller must size the SGL
959 *	appropriately.
960 */
961static inline unsigned int make_sgl(const struct sk_buff *skb,
962				    struct sg_ent *sgp, unsigned char *start,
963				    unsigned int len, struct pci_dev *pdev)
964{
965	dma_addr_t mapping;
966	unsigned int i, j = 0, nfrags;
967
968	if (len) {
969		mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
970		sgp->len[0] = cpu_to_be32(len);
971		sgp->addr[0] = cpu_to_be64(mapping);
972		j = 1;
973	}
974
975	nfrags = skb_shinfo(skb)->nr_frags;
976	for (i = 0; i < nfrags; i++) {
977		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
978
979		mapping = pci_map_page(pdev, frag->page, frag->page_offset,
980				       frag->size, PCI_DMA_TODEVICE);
981		sgp->len[j] = cpu_to_be32(frag->size);
982		sgp->addr[j] = cpu_to_be64(mapping);
983		j ^= 1;
984		if (j == 0)
985			++sgp;
986	}
987	if (j)
988		sgp->len[j] = 0;
989	return ((nfrags + (len != 0)) * 3) / 2 + j;
990}
991
992/**
993 *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
994 *	@adap: the adapter
995 *	@q: the Tx queue
996 *
997 *	Ring the doorbel if a Tx queue is asleep.  There is a natural race,
998 *	where the HW is going to sleep just after we checked, however,
999 *	then the interrupt handler will detect the outstanding TX packet
1000 *	and ring the doorbell for us.
1001 *
1002 *	When GTS is disabled we unconditionally ring the doorbell.
1003 */
1004static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
1005{
1006#if USE_GTS
1007	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1008	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1009		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1010		t3_write_reg(adap, A_SG_KDOORBELL,
1011			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1012	}
1013#else
1014	wmb();			/* write descriptors before telling HW */
1015	t3_write_reg(adap, A_SG_KDOORBELL,
1016		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1017#endif
1018}
1019
1020static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
1021{
1022#if SGE_NUM_GENBITS == 2
1023	d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
1024#endif
1025}
1026
1027/**
1028 *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1029 *	@ndesc: number of Tx descriptors spanned by the SGL
1030 *	@skb: the packet corresponding to the WR
1031 *	@d: first Tx descriptor to be written
1032 *	@pidx: index of above descriptors
1033 *	@q: the SGE Tx queue
1034 *	@sgl: the SGL
1035 *	@flits: number of flits to the start of the SGL in the first descriptor
1036 *	@sgl_flits: the SGL size in flits
1037 *	@gen: the Tx descriptor generation
1038 *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1039 *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1040 *
1041 *	Write a work request header and an associated SGL.  If the SGL is
1042 *	small enough to fit into one Tx descriptor it has already been written
1043 *	and we just need to write the WR header.  Otherwise we distribute the
1044 *	SGL across the number of descriptors it spans.
1045 */
1046static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
1047			     struct tx_desc *d, unsigned int pidx,
1048			     const struct sge_txq *q,
1049			     const struct sg_ent *sgl,
1050			     unsigned int flits, unsigned int sgl_flits,
1051			     unsigned int gen, __be32 wr_hi,
1052			     __be32 wr_lo)
1053{
1054	struct work_request_hdr *wrp = (struct work_request_hdr *)d;
1055	struct tx_sw_desc *sd = &q->sdesc[pidx];
1056
1057	sd->skb = skb;
1058	if (need_skb_unmap()) {
1059		sd->fragidx = 0;
1060		sd->addr_idx = 0;
1061		sd->sflit = flits;
1062	}
1063
1064	if (likely(ndesc == 1)) {
1065		sd->eop = 1;
1066		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1067				   V_WR_SGLSFLT(flits)) | wr_hi;
1068		wmb();
1069		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1070				   V_WR_GEN(gen)) | wr_lo;
1071		wr_gen2(d, gen);
1072	} else {
1073		unsigned int ogen = gen;
1074		const u64 *fp = (const u64 *)sgl;
1075		struct work_request_hdr *wp = wrp;
1076
1077		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1078				   V_WR_SGLSFLT(flits)) | wr_hi;
1079
1080		while (sgl_flits) {
1081			unsigned int avail = WR_FLITS - flits;
1082
1083			if (avail > sgl_flits)
1084				avail = sgl_flits;
1085			memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1086			sgl_flits -= avail;
1087			ndesc--;
1088			if (!sgl_flits)
1089				break;
1090
1091			fp += avail;
1092			d++;
1093			sd->eop = 0;
1094			sd++;
1095			if (++pidx == q->size) {
1096				pidx = 0;
1097				gen ^= 1;
1098				d = q->desc;
1099				sd = q->sdesc;
1100			}
1101
1102			sd->skb = skb;
1103			wrp = (struct work_request_hdr *)d;
1104			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1105					   V_WR_SGLSFLT(1)) | wr_hi;
1106			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1107							sgl_flits + 1)) |
1108					   V_WR_GEN(gen)) | wr_lo;
1109			wr_gen2(d, gen);
1110			flits = 1;
1111		}
1112		sd->eop = 1;
1113		wrp->wr_hi |= htonl(F_WR_EOP);
1114		wmb();
1115		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1116		wr_gen2((struct tx_desc *)wp, ogen);
1117		WARN_ON(ndesc != 0);
1118	}
1119}
1120
1121/**
1122 *	write_tx_pkt_wr - write a TX_PKT work request
1123 *	@adap: the adapter
1124 *	@skb: the packet to send
1125 *	@pi: the egress interface
1126 *	@pidx: index of the first Tx descriptor to write
1127 *	@gen: the generation value to use
1128 *	@q: the Tx queue
1129 *	@ndesc: number of descriptors the packet will occupy
1130 *	@compl: the value of the COMPL bit to use
1131 *
1132 *	Generate a TX_PKT work request to send the supplied packet.
1133 */
1134static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1135			    const struct port_info *pi,
1136			    unsigned int pidx, unsigned int gen,
1137			    struct sge_txq *q, unsigned int ndesc,
1138			    unsigned int compl)
1139{
1140	unsigned int flits, sgl_flits, cntrl, tso_info;
1141	struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1142	struct tx_desc *d = &q->desc[pidx];
1143	struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1144
1145	cpl->len = htonl(skb->len);
1146	cntrl = V_TXPKT_INTF(pi->port_id);
1147
1148	if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1149		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1150
1151	tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1152	if (tso_info) {
1153		int eth_type;
1154		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1155
1156		d->flit[2] = 0;
1157		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1158		hdr->cntrl = htonl(cntrl);
1159		eth_type = skb_network_offset(skb) == ETH_HLEN ?
1160		    CPL_ETH_II : CPL_ETH_II_VLAN;
1161		tso_info |= V_LSO_ETH_TYPE(eth_type) |
1162		    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1163		    V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1164		hdr->lso_info = htonl(tso_info);
1165		flits = 3;
1166	} else {
1167		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1168		cntrl |= F_TXPKT_IPCSUM_DIS;	/* SW calculates IP csum */
1169		cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1170		cpl->cntrl = htonl(cntrl);
1171
1172		if (skb->len <= WR_LEN - sizeof(*cpl)) {
1173			q->sdesc[pidx].skb = NULL;
1174			if (!skb->data_len)
1175				skb_copy_from_linear_data(skb, &d->flit[2],
1176							  skb->len);
1177			else
1178				skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1179
1180			flits = (skb->len + 7) / 8 + 2;
1181			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1182					      V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1183					      | F_WR_SOP | F_WR_EOP | compl);
1184			wmb();
1185			cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1186					      V_WR_TID(q->token));
1187			wr_gen2(d, gen);
1188			kfree_skb(skb);
1189			return;
1190		}
1191
1192		flits = 2;
1193	}
1194
1195	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1196	sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1197
1198	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1199			 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1200			 htonl(V_WR_TID(q->token)));
1201}
1202
1203static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1204				    struct sge_qset *qs, struct sge_txq *q)
1205{
1206	netif_tx_stop_queue(txq);
1207	set_bit(TXQ_ETH, &qs->txq_stopped);
1208	q->stops++;
1209}
1210
1211/**
1212 *	eth_xmit - add a packet to the Ethernet Tx queue
1213 *	@skb: the packet
1214 *	@dev: the egress net device
1215 *
1216 *	Add a packet to an SGE Tx queue.  Runs with softirqs disabled.
1217 */
1218netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1219{
1220	int qidx;
1221	unsigned int ndesc, pidx, credits, gen, compl;
1222	const struct port_info *pi = netdev_priv(dev);
1223	struct adapter *adap = pi->adapter;
1224	struct netdev_queue *txq;
1225	struct sge_qset *qs;
1226	struct sge_txq *q;
1227
1228	/*
1229	 * The chip min packet length is 9 octets but play safe and reject
1230	 * anything shorter than an Ethernet header.
1231	 */
1232	if (unlikely(skb->len < ETH_HLEN)) {
1233		dev_kfree_skb(skb);
1234		return NETDEV_TX_OK;
1235	}
1236
1237	qidx = skb_get_queue_mapping(skb);
1238	qs = &pi->qs[qidx];
1239	q = &qs->txq[TXQ_ETH];
1240	txq = netdev_get_tx_queue(dev, qidx);
1241
1242	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1243
1244	credits = q->size - q->in_use;
1245	ndesc = calc_tx_descs(skb);
1246
1247	if (unlikely(credits < ndesc)) {
1248		t3_stop_tx_queue(txq, qs, q);
1249		dev_err(&adap->pdev->dev,
1250			"%s: Tx ring %u full while queue awake!\n",
1251			dev->name, q->cntxt_id & 7);
1252		return NETDEV_TX_BUSY;
1253	}
1254
1255	q->in_use += ndesc;
1256	if (unlikely(credits - ndesc < q->stop_thres)) {
1257		t3_stop_tx_queue(txq, qs, q);
1258
1259		if (should_restart_tx(q) &&
1260		    test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1261			q->restarts++;
1262			netif_tx_start_queue(txq);
1263		}
1264	}
1265
1266	gen = q->gen;
1267	q->unacked += ndesc;
1268	compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1269	q->unacked &= 7;
1270	pidx = q->pidx;
1271	q->pidx += ndesc;
1272	if (q->pidx >= q->size) {
1273		q->pidx -= q->size;
1274		q->gen ^= 1;
1275	}
1276
1277	/* update port statistics */
1278	if (skb->ip_summed == CHECKSUM_COMPLETE)
1279		qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1280	if (skb_shinfo(skb)->gso_size)
1281		qs->port_stats[SGE_PSTAT_TSO]++;
1282	if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1283		qs->port_stats[SGE_PSTAT_VLANINS]++;
1284
1285	/*
1286	 * We do not use Tx completion interrupts to free DMAd Tx packets.
1287	 * This is good for performance but means that we rely on new Tx
1288	 * packets arriving to run the destructors of completed packets,
1289	 * which open up space in their sockets' send queues.  Sometimes
1290	 * we do not get such new packets causing Tx to stall.  A single
1291	 * UDP transmitter is a good example of this situation.  We have
1292	 * a clean up timer that periodically reclaims completed packets
1293	 * but it doesn't run often enough (nor do we want it to) to prevent
1294	 * lengthy stalls.  A solution to this problem is to run the
1295	 * destructor early, after the packet is queued but before it's DMAd.
1296	 * A cons is that we lie to socket memory accounting, but the amount
1297	 * of extra memory is reasonable (limited by the number of Tx
1298	 * descriptors), the packets do actually get freed quickly by new
1299	 * packets almost always, and for protocols like TCP that wait for
1300	 * acks to really free up the data the extra memory is even less.
1301	 * On the positive side we run the destructors on the sending CPU
1302	 * rather than on a potentially different completing CPU, usually a
1303	 * good thing.  We also run them without holding our Tx queue lock,
1304	 * unlike what reclaim_completed_tx() would otherwise do.
1305	 *
1306	 * Run the destructor before telling the DMA engine about the packet
1307	 * to make sure it doesn't complete and get freed prematurely.
1308	 */
1309	if (likely(!skb_shared(skb)))
1310		skb_orphan(skb);
1311
1312	write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1313	check_ring_tx_db(adap, q);
1314	return NETDEV_TX_OK;
1315}
1316
1317/**
1318 *	write_imm - write a packet into a Tx descriptor as immediate data
1319 *	@d: the Tx descriptor to write
1320 *	@skb: the packet
1321 *	@len: the length of packet data to write as immediate data
1322 *	@gen: the generation bit value to write
1323 *
1324 *	Writes a packet as immediate data into a Tx descriptor.  The packet
1325 *	contains a work request at its beginning.  We must write the packet
1326 *	carefully so the SGE doesn't read it accidentally before it's written
1327 *	in its entirety.
1328 */
1329static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1330			     unsigned int len, unsigned int gen)
1331{
1332	struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1333	struct work_request_hdr *to = (struct work_request_hdr *)d;
1334
1335	if (likely(!skb->data_len))
1336		memcpy(&to[1], &from[1], len - sizeof(*from));
1337	else
1338		skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1339
1340	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1341					V_WR_BCNTLFLT(len & 7));
1342	wmb();
1343	to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1344					V_WR_LEN((len + 7) / 8));
1345	wr_gen2(d, gen);
1346	kfree_skb(skb);
1347}
1348
1349/**
1350 *	check_desc_avail - check descriptor availability on a send queue
1351 *	@adap: the adapter
1352 *	@q: the send queue
1353 *	@skb: the packet needing the descriptors
1354 *	@ndesc: the number of Tx descriptors needed
1355 *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1356 *
1357 *	Checks if the requested number of Tx descriptors is available on an
1358 *	SGE send queue.  If the queue is already suspended or not enough
1359 *	descriptors are available the packet is queued for later transmission.
1360 *	Must be called with the Tx queue locked.
1361 *
1362 *	Returns 0 if enough descriptors are available, 1 if there aren't
1363 *	enough descriptors and the packet has been queued, and 2 if the caller
1364 *	needs to retry because there weren't enough descriptors at the
1365 *	beginning of the call but some freed up in the mean time.
1366 */
1367static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1368				   struct sk_buff *skb, unsigned int ndesc,
1369				   unsigned int qid)
1370{
1371	if (unlikely(!skb_queue_empty(&q->sendq))) {
1372	      addq_exit:__skb_queue_tail(&q->sendq, skb);
1373		return 1;
1374	}
1375	if (unlikely(q->size - q->in_use < ndesc)) {
1376		struct sge_qset *qs = txq_to_qset(q, qid);
1377
1378		set_bit(qid, &qs->txq_stopped);
1379		smp_mb__after_clear_bit();
1380
1381		if (should_restart_tx(q) &&
1382		    test_and_clear_bit(qid, &qs->txq_stopped))
1383			return 2;
1384
1385		q->stops++;
1386		goto addq_exit;
1387	}
1388	return 0;
1389}
1390
1391/**
1392 *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1393 *	@q: the SGE control Tx queue
1394 *
1395 *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1396 *	that send only immediate data (presently just the control queues) and
1397 *	thus do not have any sk_buffs to release.
1398 */
1399static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1400{
1401	unsigned int reclaim = q->processed - q->cleaned;
1402
1403	q->in_use -= reclaim;
1404	q->cleaned += reclaim;
1405}
1406
1407static inline int immediate(const struct sk_buff *skb)
1408{
1409	return skb->len <= WR_LEN;
1410}
1411
1412/**
1413 *	ctrl_xmit - send a packet through an SGE control Tx queue
1414 *	@adap: the adapter
1415 *	@q: the control queue
1416 *	@skb: the packet
1417 *
1418 *	Send a packet through an SGE control Tx queue.  Packets sent through
1419 *	a control queue must fit entirely as immediate data in a single Tx
1420 *	descriptor and have no page fragments.
1421 */
1422static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1423		     struct sk_buff *skb)
1424{
1425	int ret;
1426	struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1427
1428	if (unlikely(!immediate(skb))) {
1429		WARN_ON(1);
1430		dev_kfree_skb(skb);
1431		return NET_XMIT_SUCCESS;
1432	}
1433
1434	wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1435	wrp->wr_lo = htonl(V_WR_TID(q->token));
1436
1437	spin_lock(&q->lock);
1438      again:reclaim_completed_tx_imm(q);
1439
1440	ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1441	if (unlikely(ret)) {
1442		if (ret == 1) {
1443			spin_unlock(&q->lock);
1444			return NET_XMIT_CN;
1445		}
1446		goto again;
1447	}
1448
1449	write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1450
1451	q->in_use++;
1452	if (++q->pidx >= q->size) {
1453		q->pidx = 0;
1454		q->gen ^= 1;
1455	}
1456	spin_unlock(&q->lock);
1457	wmb();
1458	t3_write_reg(adap, A_SG_KDOORBELL,
1459		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1460	return NET_XMIT_SUCCESS;
1461}
1462
1463/**
1464 *	restart_ctrlq - restart a suspended control queue
1465 *	@qs: the queue set cotaining the control queue
1466 *
1467 *	Resumes transmission on a suspended Tx control queue.
1468 */
1469static void restart_ctrlq(unsigned long data)
1470{
1471	struct sk_buff *skb;
1472	struct sge_qset *qs = (struct sge_qset *)data;
1473	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1474
1475	spin_lock(&q->lock);
1476      again:reclaim_completed_tx_imm(q);
1477
1478	while (q->in_use < q->size &&
1479	       (skb = __skb_dequeue(&q->sendq)) != NULL) {
1480
1481		write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1482
1483		if (++q->pidx >= q->size) {
1484			q->pidx = 0;
1485			q->gen ^= 1;
1486		}
1487		q->in_use++;
1488	}
1489
1490	if (!skb_queue_empty(&q->sendq)) {
1491		set_bit(TXQ_CTRL, &qs->txq_stopped);
1492		smp_mb__after_clear_bit();
1493
1494		if (should_restart_tx(q) &&
1495		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1496			goto again;
1497		q->stops++;
1498	}
1499
1500	spin_unlock(&q->lock);
1501	wmb();
1502	t3_write_reg(qs->adap, A_SG_KDOORBELL,
1503		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1504}
1505
1506/*
1507 * Send a management message through control queue 0
1508 */
1509int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1510{
1511	int ret;
1512	local_bh_disable();
1513	ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1514	local_bh_enable();
1515
1516	return ret;
1517}
1518
1519/**
1520 *	deferred_unmap_destructor - unmap a packet when it is freed
1521 *	@skb: the packet
1522 *
1523 *	This is the packet destructor used for Tx packets that need to remain
1524 *	mapped until they are freed rather than until their Tx descriptors are
1525 *	freed.
1526 */
1527static void deferred_unmap_destructor(struct sk_buff *skb)
1528{
1529	int i;
1530	const dma_addr_t *p;
1531	const struct skb_shared_info *si;
1532	const struct deferred_unmap_info *dui;
1533
1534	dui = (struct deferred_unmap_info *)skb->head;
1535	p = dui->addr;
1536
1537	if (skb->tail - skb->transport_header)
1538		pci_unmap_single(dui->pdev, *p++,
1539				 skb->tail - skb->transport_header,
1540				 PCI_DMA_TODEVICE);
1541
1542	si = skb_shinfo(skb);
1543	for (i = 0; i < si->nr_frags; i++)
1544		pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1545			       PCI_DMA_TODEVICE);
1546}
1547
1548static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1549				     const struct sg_ent *sgl, int sgl_flits)
1550{
1551	dma_addr_t *p;
1552	struct deferred_unmap_info *dui;
1553
1554	dui = (struct deferred_unmap_info *)skb->head;
1555	dui->pdev = pdev;
1556	for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1557		*p++ = be64_to_cpu(sgl->addr[0]);
1558		*p++ = be64_to_cpu(sgl->addr[1]);
1559	}
1560	if (sgl_flits)
1561		*p = be64_to_cpu(sgl->addr[0]);
1562}
1563
1564/**
1565 *	write_ofld_wr - write an offload work request
1566 *	@adap: the adapter
1567 *	@skb: the packet to send
1568 *	@q: the Tx queue
1569 *	@pidx: index of the first Tx descriptor to write
1570 *	@gen: the generation value to use
1571 *	@ndesc: number of descriptors the packet will occupy
1572 *
1573 *	Write an offload work request to send the supplied packet.  The packet
1574 *	data already carry the work request with most fields populated.
1575 */
1576static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1577			  struct sge_txq *q, unsigned int pidx,
1578			  unsigned int gen, unsigned int ndesc)
1579{
1580	unsigned int sgl_flits, flits;
1581	struct work_request_hdr *from;
1582	struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1583	struct tx_desc *d = &q->desc[pidx];
1584
1585	if (immediate(skb)) {
1586		q->sdesc[pidx].skb = NULL;
1587		write_imm(d, skb, skb->len, gen);
1588		return;
1589	}
1590
1591	/* Only TX_DATA builds SGLs */
1592
1593	from = (struct work_request_hdr *)skb->data;
1594	memcpy(&d->flit[1], &from[1],
1595	       skb_transport_offset(skb) - sizeof(*from));
1596
1597	flits = skb_transport_offset(skb) / 8;
1598	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1599	sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1600			     skb->tail - skb->transport_header,
1601			     adap->pdev);
1602	if (need_skb_unmap()) {
1603		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1604		skb->destructor = deferred_unmap_destructor;
1605	}
1606
1607	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1608			 gen, from->wr_hi, from->wr_lo);
1609}
1610
1611/**
1612 *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1613 *	@skb: the packet
1614 *
1615 * 	Returns the number of Tx descriptors needed for the given offload
1616 * 	packet.  These packets are already fully constructed.
1617 */
1618static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1619{
1620	unsigned int flits, cnt;
1621
1622	if (skb->len <= WR_LEN)
1623		return 1;	/* packet fits as immediate data */
1624
1625	flits = skb_transport_offset(skb) / 8;	/* headers */
1626	cnt = skb_shinfo(skb)->nr_frags;
1627	if (skb->tail != skb->transport_header)
1628		cnt++;
1629	return flits_to_desc(flits + sgl_len(cnt));
1630}
1631
1632/**
1633 *	ofld_xmit - send a packet through an offload queue
1634 *	@adap: the adapter
1635 *	@q: the Tx offload queue
1636 *	@skb: the packet
1637 *
1638 *	Send an offload packet through an SGE offload queue.
1639 */
1640static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1641		     struct sk_buff *skb)
1642{
1643	int ret;
1644	unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1645
1646	spin_lock(&q->lock);
1647again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1648
1649	ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1650	if (unlikely(ret)) {
1651		if (ret == 1) {
1652			skb->priority = ndesc;	/* save for restart */
1653			spin_unlock(&q->lock);
1654			return NET_XMIT_CN;
1655		}
1656		goto again;
1657	}
1658
1659	gen = q->gen;
1660	q->in_use += ndesc;
1661	pidx = q->pidx;
1662	q->pidx += ndesc;
1663	if (q->pidx >= q->size) {
1664		q->pidx -= q->size;
1665		q->gen ^= 1;
1666	}
1667	spin_unlock(&q->lock);
1668
1669	write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1670	check_ring_tx_db(adap, q);
1671	return NET_XMIT_SUCCESS;
1672}
1673
1674/**
1675 *	restart_offloadq - restart a suspended offload queue
1676 *	@qs: the queue set cotaining the offload queue
1677 *
1678 *	Resumes transmission on a suspended Tx offload queue.
1679 */
1680static void restart_offloadq(unsigned long data)
1681{
1682	struct sk_buff *skb;
1683	struct sge_qset *qs = (struct sge_qset *)data;
1684	struct sge_txq *q = &qs->txq[TXQ_OFLD];
1685	const struct port_info *pi = netdev_priv(qs->netdev);
1686	struct adapter *adap = pi->adapter;
1687
1688	spin_lock(&q->lock);
1689again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1690
1691	while ((skb = skb_peek(&q->sendq)) != NULL) {
1692		unsigned int gen, pidx;
1693		unsigned int ndesc = skb->priority;
1694
1695		if (unlikely(q->size - q->in_use < ndesc)) {
1696			set_bit(TXQ_OFLD, &qs->txq_stopped);
1697			smp_mb__after_clear_bit();
1698
1699			if (should_restart_tx(q) &&
1700			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1701				goto again;
1702			q->stops++;
1703			break;
1704		}
1705
1706		gen = q->gen;
1707		q->in_use += ndesc;
1708		pidx = q->pidx;
1709		q->pidx += ndesc;
1710		if (q->pidx >= q->size) {
1711			q->pidx -= q->size;
1712			q->gen ^= 1;
1713		}
1714		__skb_unlink(skb, &q->sendq);
1715		spin_unlock(&q->lock);
1716
1717		write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1718		spin_lock(&q->lock);
1719	}
1720	spin_unlock(&q->lock);
1721
1722#if USE_GTS
1723	set_bit(TXQ_RUNNING, &q->flags);
1724	set_bit(TXQ_LAST_PKT_DB, &q->flags);
1725#endif
1726	wmb();
1727	t3_write_reg(adap, A_SG_KDOORBELL,
1728		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1729}
1730
1731/**
1732 *	queue_set - return the queue set a packet should use
1733 *	@skb: the packet
1734 *
1735 *	Maps a packet to the SGE queue set it should use.  The desired queue
1736 *	set is carried in bits 1-3 in the packet's priority.
1737 */
1738static inline int queue_set(const struct sk_buff *skb)
1739{
1740	return skb->priority >> 1;
1741}
1742
1743/**
1744 *	is_ctrl_pkt - return whether an offload packet is a control packet
1745 *	@skb: the packet
1746 *
1747 *	Determines whether an offload packet should use an OFLD or a CTRL
1748 *	Tx queue.  This is indicated by bit 0 in the packet's priority.
1749 */
1750static inline int is_ctrl_pkt(const struct sk_buff *skb)
1751{
1752	return skb->priority & 1;
1753}
1754
1755/**
1756 *	t3_offload_tx - send an offload packet
1757 *	@tdev: the offload device to send to
1758 *	@skb: the packet
1759 *
1760 *	Sends an offload packet.  We use the packet priority to select the
1761 *	appropriate Tx queue as follows: bit 0 indicates whether the packet
1762 *	should be sent as regular or control, bits 1-3 select the queue set.
1763 */
1764int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1765{
1766	struct adapter *adap = tdev2adap(tdev);
1767	struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1768
1769	if (unlikely(is_ctrl_pkt(skb)))
1770		return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1771
1772	return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1773}
1774
1775/**
1776 *	offload_enqueue - add an offload packet to an SGE offload receive queue
1777 *	@q: the SGE response queue
1778 *	@skb: the packet
1779 *
1780 *	Add a new offload packet to an SGE response queue's offload packet
1781 *	queue.  If the packet is the first on the queue it schedules the RX
1782 *	softirq to process the queue.
1783 */
1784static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1785{
1786	int was_empty = skb_queue_empty(&q->rx_queue);
1787
1788	__skb_queue_tail(&q->rx_queue, skb);
1789
1790	if (was_empty) {
1791		struct sge_qset *qs = rspq_to_qset(q);
1792
1793		napi_schedule(&qs->napi);
1794	}
1795}
1796
1797/**
1798 *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1799 *	@tdev: the offload device that will be receiving the packets
1800 *	@q: the SGE response queue that assembled the bundle
1801 *	@skbs: the partial bundle
1802 *	@n: the number of packets in the bundle
1803 *
1804 *	Delivers a (partial) bundle of Rx offload packets to an offload device.
1805 */
1806static inline void deliver_partial_bundle(struct t3cdev *tdev,
1807					  struct sge_rspq *q,
1808					  struct sk_buff *skbs[], int n)
1809{
1810	if (n) {
1811		q->offload_bundles++;
1812		tdev->recv(tdev, skbs, n);
1813	}
1814}
1815
1816/**
1817 *	ofld_poll - NAPI handler for offload packets in interrupt mode
1818 *	@dev: the network device doing the polling
1819 *	@budget: polling budget
1820 *
1821 *	The NAPI handler for offload packets when a response queue is serviced
1822 *	by the hard interrupt handler, i.e., when it's operating in non-polling
1823 *	mode.  Creates small packet batches and sends them through the offload
1824 *	receive handler.  Batches need to be of modest size as we do prefetches
1825 *	on the packets in each.
1826 */
1827static int ofld_poll(struct napi_struct *napi, int budget)
1828{
1829	struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1830	struct sge_rspq *q = &qs->rspq;
1831	struct adapter *adapter = qs->adap;
1832	int work_done = 0;
1833
1834	while (work_done < budget) {
1835		struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1836		struct sk_buff_head queue;
1837		int ngathered;
1838
1839		spin_lock_irq(&q->lock);
1840		__skb_queue_head_init(&queue);
1841		skb_queue_splice_init(&q->rx_queue, &queue);
1842		if (skb_queue_empty(&queue)) {
1843			napi_complete(napi);
1844			spin_unlock_irq(&q->lock);
1845			return work_done;
1846		}
1847		spin_unlock_irq(&q->lock);
1848
1849		ngathered = 0;
1850		skb_queue_walk_safe(&queue, skb, tmp) {
1851			if (work_done >= budget)
1852				break;
1853			work_done++;
1854
1855			__skb_unlink(skb, &queue);
1856			prefetch(skb->data);
1857			skbs[ngathered] = skb;
1858			if (++ngathered == RX_BUNDLE_SIZE) {
1859				q->offload_bundles++;
1860				adapter->tdev.recv(&adapter->tdev, skbs,
1861						   ngathered);
1862				ngathered = 0;
1863			}
1864		}
1865		if (!skb_queue_empty(&queue)) {
1866			/* splice remaining packets back onto Rx queue */
1867			spin_lock_irq(&q->lock);
1868			skb_queue_splice(&queue, &q->rx_queue);
1869			spin_unlock_irq(&q->lock);
1870		}
1871		deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1872	}
1873
1874	return work_done;
1875}
1876
1877/**
1878 *	rx_offload - process a received offload packet
1879 *	@tdev: the offload device receiving the packet
1880 *	@rq: the response queue that received the packet
1881 *	@skb: the packet
1882 *	@rx_gather: a gather list of packets if we are building a bundle
1883 *	@gather_idx: index of the next available slot in the bundle
1884 *
1885 *	Process an ingress offload pakcet and add it to the offload ingress
1886 *	queue. 	Returns the index of the next available slot in the bundle.
1887 */
1888static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1889			     struct sk_buff *skb, struct sk_buff *rx_gather[],
1890			     unsigned int gather_idx)
1891{
1892	skb_reset_mac_header(skb);
1893	skb_reset_network_header(skb);
1894	skb_reset_transport_header(skb);
1895
1896	if (rq->polling) {
1897		rx_gather[gather_idx++] = skb;
1898		if (gather_idx == RX_BUNDLE_SIZE) {
1899			tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1900			gather_idx = 0;
1901			rq->offload_bundles++;
1902		}
1903	} else
1904		offload_enqueue(rq, skb);
1905
1906	return gather_idx;
1907}
1908
1909/**
1910 *	restart_tx - check whether to restart suspended Tx queues
1911 *	@qs: the queue set to resume
1912 *
1913 *	Restarts suspended Tx queues of an SGE queue set if they have enough
1914 *	free resources to resume operation.
1915 */
1916static void restart_tx(struct sge_qset *qs)
1917{
1918	if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1919	    should_restart_tx(&qs->txq[TXQ_ETH]) &&
1920	    test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1921		qs->txq[TXQ_ETH].restarts++;
1922		if (netif_running(qs->netdev))
1923			netif_tx_wake_queue(qs->tx_q);
1924	}
1925
1926	if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1927	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1928	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1929		qs->txq[TXQ_OFLD].restarts++;
1930		tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1931	}
1932	if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1933	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1934	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1935		qs->txq[TXQ_CTRL].restarts++;
1936		tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1937	}
1938}
1939
1940/**
1941 *	cxgb3_arp_process - process an ARP request probing a private IP address
1942 *	@adapter: the adapter
1943 *	@skb: the skbuff containing the ARP request
1944 *
1945 *	Check if the ARP request is probing the private IP address
1946 *	dedicated to iSCSI, generate an ARP reply if so.
1947 */
1948static void cxgb3_arp_process(struct port_info *pi, struct sk_buff *skb)
1949{
1950	struct net_device *dev = skb->dev;
1951	struct arphdr *arp;
1952	unsigned char *arp_ptr;
1953	unsigned char *sha;
1954	__be32 sip, tip;
1955
1956	if (!dev)
1957		return;
1958
1959	skb_reset_network_header(skb);
1960	arp = arp_hdr(skb);
1961
1962	if (arp->ar_op != htons(ARPOP_REQUEST))
1963		return;
1964
1965	arp_ptr = (unsigned char *)(arp + 1);
1966	sha = arp_ptr;
1967	arp_ptr += dev->addr_len;
1968	memcpy(&sip, arp_ptr, sizeof(sip));
1969	arp_ptr += sizeof(sip);
1970	arp_ptr += dev->addr_len;
1971	memcpy(&tip, arp_ptr, sizeof(tip));
1972
1973	if (tip != pi->iscsi_ipv4addr)
1974		return;
1975
1976	arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1977		 pi->iscsic.mac_addr, sha);
1978
1979}
1980
1981static inline int is_arp(struct sk_buff *skb)
1982{
1983	return skb->protocol == htons(ETH_P_ARP);
1984}
1985
1986static void cxgb3_process_iscsi_prov_pack(struct port_info *pi,
1987					struct sk_buff *skb)
1988{
1989	if (is_arp(skb)) {
1990		cxgb3_arp_process(pi, skb);
1991		return;
1992	}
1993
1994	if (pi->iscsic.recv)
1995		pi->iscsic.recv(pi, skb);
1996
1997}
1998
1999/**
2000 *	rx_eth - process an ingress ethernet packet
2001 *	@adap: the adapter
2002 *	@rq: the response queue that received the packet
2003 *	@skb: the packet
2004 *	@pad: amount of padding at the start of the buffer
2005 *
2006 *	Process an ingress ethernet pakcet and deliver it to the stack.
2007 *	The padding is 2 if the packet was delivered in an Rx buffer and 0
2008 *	if it was immediate data in a response.
2009 */
2010static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
2011		   struct sk_buff *skb, int pad, int lro)
2012{
2013	struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
2014	struct sge_qset *qs = rspq_to_qset(rq);
2015	struct port_info *pi;
2016
2017	skb_pull(skb, sizeof(*p) + pad);
2018	skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
2019	pi = netdev_priv(skb->dev);
2020	if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid &&
2021	    p->csum == htons(0xffff) && !p->fragment) {
2022		qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2023		skb->ip_summed = CHECKSUM_UNNECESSARY;
2024	} else
2025		skb->ip_summed = CHECKSUM_NONE;
2026	skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
2027
2028	if (unlikely(p->vlan_valid)) {
2029		struct vlan_group *grp = pi->vlan_grp;
2030
2031		qs->port_stats[SGE_PSTAT_VLANEX]++;
2032		if (likely(grp))
2033			if (lro)
2034				vlan_gro_receive(&qs->napi, grp,
2035						 ntohs(p->vlan), skb);
2036			else {
2037				if (unlikely(pi->iscsic.flags)) {
2038					unsigned short vtag = ntohs(p->vlan) &
2039								VLAN_VID_MASK;
2040					skb->dev = vlan_group_get_device(grp,
2041									 vtag);
2042					cxgb3_process_iscsi_prov_pack(pi, skb);
2043				}
2044				__vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
2045					  	  rq->polling);
2046			}
2047		else
2048			dev_kfree_skb_any(skb);
2049	} else if (rq->polling) {
2050		if (lro)
2051			napi_gro_receive(&qs->napi, skb);
2052		else {
2053			if (unlikely(pi->iscsic.flags))
2054				cxgb3_process_iscsi_prov_pack(pi, skb);
2055			netif_receive_skb(skb);
2056		}
2057	} else
2058		netif_rx(skb);
2059}
2060
2061static inline int is_eth_tcp(u32 rss)
2062{
2063	return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2064}
2065
2066/**
2067 *	lro_add_page - add a page chunk to an LRO session
2068 *	@adap: the adapter
2069 *	@qs: the associated queue set
2070 *	@fl: the free list containing the page chunk to add
2071 *	@len: packet length
2072 *	@complete: Indicates the last fragment of a frame
2073 *
2074 *	Add a received packet contained in a page chunk to an existing LRO
2075 *	session.
2076 */
2077static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2078			 struct sge_fl *fl, int len, int complete)
2079{
2080	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2081	struct port_info *pi = netdev_priv(qs->netdev);
2082	struct sk_buff *skb = NULL;
2083	struct cpl_rx_pkt *cpl;
2084	struct skb_frag_struct *rx_frag;
2085	int nr_frags;
2086	int offset = 0;
2087
2088	if (!qs->nomem) {
2089		skb = napi_get_frags(&qs->napi);
2090		qs->nomem = !skb;
2091	}
2092
2093	fl->credits--;
2094
2095	pci_dma_sync_single_for_cpu(adap->pdev,
2096				    dma_unmap_addr(sd, dma_addr),
2097				    fl->buf_size - SGE_PG_RSVD,
2098				    PCI_DMA_FROMDEVICE);
2099
2100	(*sd->pg_chunk.p_cnt)--;
2101	if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
2102		pci_unmap_page(adap->pdev,
2103			       sd->pg_chunk.mapping,
2104			       fl->alloc_size,
2105			       PCI_DMA_FROMDEVICE);
2106
2107	if (!skb) {
2108		put_page(sd->pg_chunk.page);
2109		if (complete)
2110			qs->nomem = 0;
2111		return;
2112	}
2113
2114	rx_frag = skb_shinfo(skb)->frags;
2115	nr_frags = skb_shinfo(skb)->nr_frags;
2116
2117	if (!nr_frags) {
2118		offset = 2 + sizeof(struct cpl_rx_pkt);
2119		cpl = qs->lro_va = sd->pg_chunk.va + 2;
2120
2121		if ((pi->rx_offload & T3_RX_CSUM) &&
2122		     cpl->csum_valid && cpl->csum == htons(0xffff)) {
2123			skb->ip_summed = CHECKSUM_UNNECESSARY;
2124			qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2125		} else
2126			skb->ip_summed = CHECKSUM_NONE;
2127	} else
2128		cpl = qs->lro_va;
2129
2130	len -= offset;
2131
2132	rx_frag += nr_frags;
2133	rx_frag->page = sd->pg_chunk.page;
2134	rx_frag->page_offset = sd->pg_chunk.offset + offset;
2135	rx_frag->size = len;
2136
2137	skb->len += len;
2138	skb->data_len += len;
2139	skb->truesize += len;
2140	skb_shinfo(skb)->nr_frags++;
2141
2142	if (!complete)
2143		return;
2144
2145	skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
2146
2147	if (unlikely(cpl->vlan_valid)) {
2148		struct vlan_group *grp = pi->vlan_grp;
2149
2150		if (likely(grp != NULL)) {
2151			vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan));
2152			return;
2153		}
2154	}
2155	napi_gro_frags(&qs->napi);
2156}
2157
2158/**
2159 *	handle_rsp_cntrl_info - handles control information in a response
2160 *	@qs: the queue set corresponding to the response
2161 *	@flags: the response control flags
2162 *
2163 *	Handles the control information of an SGE response, such as GTS
2164 *	indications and completion credits for the queue set's Tx queues.
2165 *	HW coalesces credits, we don't do any extra SW coalescing.
2166 */
2167static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2168{
2169	unsigned int credits;
2170
2171#if USE_GTS
2172	if (flags & F_RSPD_TXQ0_GTS)
2173		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2174#endif
2175
2176	credits = G_RSPD_TXQ0_CR(flags);
2177	if (credits)
2178		qs->txq[TXQ_ETH].processed += credits;
2179
2180	credits = G_RSPD_TXQ2_CR(flags);
2181	if (credits)
2182		qs->txq[TXQ_CTRL].processed += credits;
2183
2184# if USE_GTS
2185	if (flags & F_RSPD_TXQ1_GTS)
2186		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2187# endif
2188	credits = G_RSPD_TXQ1_CR(flags);
2189	if (credits)
2190		qs->txq[TXQ_OFLD].processed += credits;
2191}
2192
2193/**
2194 *	check_ring_db - check if we need to ring any doorbells
2195 *	@adapter: the adapter
2196 *	@qs: the queue set whose Tx queues are to be examined
2197 *	@sleeping: indicates which Tx queue sent GTS
2198 *
2199 *	Checks if some of a queue set's Tx queues need to ring their doorbells
2200 *	to resume transmission after idling while they still have unprocessed
2201 *	descriptors.
2202 */
2203static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2204			  unsigned int sleeping)
2205{
2206	if (sleeping & F_RSPD_TXQ0_GTS) {
2207		struct sge_txq *txq = &qs->txq[TXQ_ETH];
2208
2209		if (txq->cleaned + txq->in_use != txq->processed &&
2210		    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2211			set_bit(TXQ_RUNNING, &txq->flags);
2212			t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2213				     V_EGRCNTX(txq->cntxt_id));
2214		}
2215	}
2216
2217	if (sleeping & F_RSPD_TXQ1_GTS) {
2218		struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2219
2220		if (txq->cleaned + txq->in_use != txq->processed &&
2221		    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2222			set_bit(TXQ_RUNNING, &txq->flags);
2223			t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2224				     V_EGRCNTX(txq->cntxt_id));
2225		}
2226	}
2227}
2228
2229/**
2230 *	is_new_response - check if a response is newly written
2231 *	@r: the response descriptor
2232 *	@q: the response queue
2233 *
2234 *	Returns true if a response descriptor contains a yet unprocessed
2235 *	response.
2236 */
2237static inline int is_new_response(const struct rsp_desc *r,
2238				  const struct sge_rspq *q)
2239{
2240	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2241}
2242
2243static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2244{
2245	q->pg_skb = NULL;
2246	q->rx_recycle_buf = 0;
2247}
2248
2249#define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2250#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2251			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2252			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2253			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2254
2255/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2256#define NOMEM_INTR_DELAY 2500
2257
2258/**
2259 *	process_responses - process responses from an SGE response queue
2260 *	@adap: the adapter
2261 *	@qs: the queue set to which the response queue belongs
2262 *	@budget: how many responses can be processed in this round
2263 *
2264 *	Process responses from an SGE response queue up to the supplied budget.
2265 *	Responses include received packets as well as credits and other events
2266 *	for the queues that belong to the response queue's queue set.
2267 *	A negative budget is effectively unlimited.
2268 *
2269 *	Additionally choose the interrupt holdoff time for the next interrupt
2270 *	on this queue.  If the system is under memory shortage use a fairly
2271 *	long delay to help recovery.
2272 */
2273static int process_responses(struct adapter *adap, struct sge_qset *qs,
2274			     int budget)
2275{
2276	struct sge_rspq *q = &qs->rspq;
2277	struct rsp_desc *r = &q->desc[q->cidx];
2278	int budget_left = budget;
2279	unsigned int sleeping = 0;
2280	struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2281	int ngathered = 0;
2282
2283	q->next_holdoff = q->holdoff_tmr;
2284
2285	while (likely(budget_left && is_new_response(r, q))) {
2286		int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
2287		struct sk_buff *skb = NULL;
2288		u32 len, flags;
2289		__be32 rss_hi, rss_lo;
2290
2291		rmb();
2292		eth = r->rss_hdr.opcode == CPL_RX_PKT;
2293		rss_hi = *(const __be32 *)r;
2294		rss_lo = r->rss_hdr.rss_hash_val;
2295		flags = ntohl(r->flags);
2296
2297		if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2298			skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2299			if (!skb)
2300				goto no_mem;
2301
2302			memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2303			skb->data[0] = CPL_ASYNC_NOTIF;
2304			rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2305			q->async_notif++;
2306		} else if (flags & F_RSPD_IMM_DATA_VALID) {
2307			skb = get_imm_packet(r);
2308			if (unlikely(!skb)) {
2309no_mem:
2310				q->next_holdoff = NOMEM_INTR_DELAY;
2311				q->nomem++;
2312				/* consume one credit since we tried */
2313				budget_left--;
2314				break;
2315			}
2316			q->imm_data++;
2317			ethpad = 0;
2318		} else if ((len = ntohl(r->len_cq)) != 0) {
2319			struct sge_fl *fl;
2320
2321			lro &= eth && is_eth_tcp(rss_hi);
2322
2323			fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2324			if (fl->use_pages) {
2325				void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2326
2327				prefetch(addr);
2328#if L1_CACHE_BYTES < 128
2329				prefetch(addr + L1_CACHE_BYTES);
2330#endif
2331				__refill_fl(adap, fl);
2332				if (lro > 0) {
2333					lro_add_page(adap, qs, fl,
2334						     G_RSPD_LEN(len),
2335						     flags & F_RSPD_EOP);
2336					 goto next_fl;
2337				}
2338
2339				skb = get_packet_pg(adap, fl, q,
2340						    G_RSPD_LEN(len),
2341						    eth ?
2342						    SGE_RX_DROP_THRES : 0);
2343				q->pg_skb = skb;
2344			} else
2345				skb = get_packet(adap, fl, G_RSPD_LEN(len),
2346						 eth ? SGE_RX_DROP_THRES : 0);
2347			if (unlikely(!skb)) {
2348				if (!eth)
2349					goto no_mem;
2350				q->rx_drops++;
2351			} else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2352				__skb_pull(skb, 2);
2353next_fl:
2354			if (++fl->cidx == fl->size)
2355				fl->cidx = 0;
2356		} else
2357			q->pure_rsps++;
2358
2359		if (flags & RSPD_CTRL_MASK) {
2360			sleeping |= flags & RSPD_GTS_MASK;
2361			handle_rsp_cntrl_info(qs, flags);
2362		}
2363
2364		r++;
2365		if (unlikely(++q->cidx == q->size)) {
2366			q->cidx = 0;
2367			q->gen ^= 1;
2368			r = q->desc;
2369		}
2370		prefetch(r);
2371
2372		if (++q->credits >= (q->size / 4)) {
2373			refill_rspq(adap, q, q->credits);
2374			q->credits = 0;
2375		}
2376
2377		packet_complete = flags &
2378				  (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2379				   F_RSPD_ASYNC_NOTIF);
2380
2381		if (skb != NULL && packet_complete) {
2382			if (eth)
2383				rx_eth(adap, q, skb, ethpad, lro);
2384			else {
2385				q->offload_pkts++;
2386				/* Preserve the RSS info in csum & priority */
2387				skb->csum = rss_hi;
2388				skb->priority = rss_lo;
2389				ngathered = rx_offload(&adap->tdev, q, skb,
2390						       offload_skbs,
2391						       ngathered);
2392			}
2393
2394			if (flags & F_RSPD_EOP)
2395				clear_rspq_bufstate(q);
2396		}
2397		--budget_left;
2398	}
2399
2400	deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2401
2402	if (sleeping)
2403		check_ring_db(adap, qs, sleeping);
2404
2405	smp_mb();		/* commit Tx queue .processed updates */
2406	if (unlikely(qs->txq_stopped != 0))
2407		restart_tx(qs);
2408
2409	budget -= budget_left;
2410	return budget;
2411}
2412
2413static inline int is_pure_response(const struct rsp_desc *r)
2414{
2415	__be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2416
2417	return (n | r->len_cq) == 0;
2418}
2419
2420/**
2421 *	napi_rx_handler - the NAPI handler for Rx processing
2422 *	@napi: the napi instance
2423 *	@budget: how many packets we can process in this round
2424 *
2425 *	Handler for new data events when using NAPI.
2426 */
2427static int napi_rx_handler(struct napi_struct *napi, int budget)
2428{
2429	struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2430	struct adapter *adap = qs->adap;
2431	int work_done = process_responses(adap, qs, budget);
2432
2433	if (likely(work_done < budget)) {
2434		napi_complete(napi);
2435
2436		/*
2437		 * Because we don't atomically flush the following
2438		 * write it is possible that in very rare cases it can
2439		 * reach the device in a way that races with a new
2440		 * response being written plus an error interrupt
2441		 * causing the NAPI interrupt handler below to return
2442		 * unhandled status to the OS.  To protect against
2443		 * this would require flushing the write and doing
2444		 * both the write and the flush with interrupts off.
2445		 * Way too expensive and unjustifiable given the
2446		 * rarity of the race.
2447		 *
2448		 * The race cannot happen at all with MSI-X.
2449		 */
2450		t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2451			     V_NEWTIMER(qs->rspq.next_holdoff) |
2452			     V_NEWINDEX(qs->rspq.cidx));
2453	}
2454	return work_done;
2455}
2456
2457/*
2458 * Returns true if the device is already scheduled for polling.
2459 */
2460static inline int napi_is_scheduled(struct napi_struct *napi)
2461{
2462	return test_bit(NAPI_STATE_SCHED, &napi->state);
2463}
2464
2465/**
2466 *	process_pure_responses - process pure responses from a response queue
2467 *	@adap: the adapter
2468 *	@qs: the queue set owning the response queue
2469 *	@r: the first pure response to process
2470 *
2471 *	A simpler version of process_responses() that handles only pure (i.e.,
2472 *	non data-carrying) responses.  Such respones are too light-weight to
2473 *	justify calling a softirq under NAPI, so we handle them specially in
2474 *	the interrupt handler.  The function is called with a pointer to a
2475 *	response, which the caller must ensure is a valid pure response.
2476 *
2477 *	Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2478 */
2479static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2480				  struct rsp_desc *r)
2481{
2482	struct sge_rspq *q = &qs->rspq;
2483	unsigned int sleeping = 0;
2484
2485	do {
2486		u32 flags = ntohl(r->flags);
2487
2488		r++;
2489		if (unlikely(++q->cidx == q->size)) {
2490			q->cidx = 0;
2491			q->gen ^= 1;
2492			r = q->desc;
2493		}
2494		prefetch(r);
2495
2496		if (flags & RSPD_CTRL_MASK) {
2497			sleeping |= flags & RSPD_GTS_MASK;
2498			handle_rsp_cntrl_info(qs, flags);
2499		}
2500
2501		q->pure_rsps++;
2502		if (++q->credits >= (q->size / 4)) {
2503			refill_rspq(adap, q, q->credits);
2504			q->credits = 0;
2505		}
2506		if (!is_new_response(r, q))
2507			break;
2508		rmb();
2509	} while (is_pure_response(r));
2510
2511	if (sleeping)
2512		check_ring_db(adap, qs, sleeping);
2513
2514	smp_mb();		/* commit Tx queue .processed updates */
2515	if (unlikely(qs->txq_stopped != 0))
2516		restart_tx(qs);
2517
2518	return is_new_response(r, q);
2519}
2520
2521/**
2522 *	handle_responses - decide what to do with new responses in NAPI mode
2523 *	@adap: the adapter
2524 *	@q: the response queue
2525 *
2526 *	This is used by the NAPI interrupt handlers to decide what to do with
2527 *	new SGE responses.  If there are no new responses it returns -1.  If
2528 *	there are new responses and they are pure (i.e., non-data carrying)
2529 *	it handles them straight in hard interrupt context as they are very
2530 *	cheap and don't deliver any packets.  Finally, if there are any data
2531 *	signaling responses it schedules the NAPI handler.  Returns 1 if it
2532 *	schedules NAPI, 0 if all new responses were pure.
2533 *
2534 *	The caller must ascertain NAPI is not already running.
2535 */
2536static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2537{
2538	struct sge_qset *qs = rspq_to_qset(q);
2539	struct rsp_desc *r = &q->desc[q->cidx];
2540
2541	if (!is_new_response(r, q))
2542		return -1;
2543	rmb();
2544	if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2545		t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2546			     V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2547		return 0;
2548	}
2549	napi_schedule(&qs->napi);
2550	return 1;
2551}
2552
2553/*
2554 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2555 * (i.e., response queue serviced in hard interrupt).
2556 */
2557irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2558{
2559	struct sge_qset *qs = cookie;
2560	struct adapter *adap = qs->adap;
2561	struct sge_rspq *q = &qs->rspq;
2562
2563	spin_lock(&q->lock);
2564	if (process_responses(adap, qs, -1) == 0)
2565		q->unhandled_irqs++;
2566	t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2567		     V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2568	spin_unlock(&q->lock);
2569	return IRQ_HANDLED;
2570}
2571
2572/*
2573 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2574 * (i.e., response queue serviced by NAPI polling).
2575 */
2576static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2577{
2578	struct sge_qset *qs = cookie;
2579	struct sge_rspq *q = &qs->rspq;
2580
2581	spin_lock(&q->lock);
2582
2583	if (handle_responses(qs->adap, q) < 0)
2584		q->unhandled_irqs++;
2585	spin_unlock(&q->lock);
2586	return IRQ_HANDLED;
2587}
2588
2589/*
2590 * The non-NAPI MSI interrupt handler.  This needs to handle data events from
2591 * SGE response queues as well as error and other async events as they all use
2592 * the same MSI vector.  We use one SGE response queue per port in this mode
2593 * and protect all response queues with queue 0's lock.
2594 */
2595static irqreturn_t t3_intr_msi(int irq, void *cookie)
2596{
2597	int new_packets = 0;
2598	struct adapter *adap = cookie;
2599	struct sge_rspq *q = &adap->sge.qs[0].rspq;
2600
2601	spin_lock(&q->lock);
2602
2603	if (process_responses(adap, &adap->sge.qs[0], -1)) {
2604		t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2605			     V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2606		new_packets = 1;
2607	}
2608
2609	if (adap->params.nports == 2 &&
2610	    process_responses(adap, &adap->sge.qs[1], -1)) {
2611		struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2612
2613		t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2614			     V_NEWTIMER(q1->next_holdoff) |
2615			     V_NEWINDEX(q1->cidx));
2616		new_packets = 1;
2617	}
2618
2619	if (!new_packets && t3_slow_intr_handler(adap) == 0)
2620		q->unhandled_irqs++;
2621
2622	spin_unlock(&q->lock);
2623	return IRQ_HANDLED;
2624}
2625
2626static int rspq_check_napi(struct sge_qset *qs)
2627{
2628	struct sge_rspq *q = &qs->rspq;
2629
2630	if (!napi_is_scheduled(&qs->napi) &&
2631	    is_new_response(&q->desc[q->cidx], q)) {
2632		napi_schedule(&qs->napi);
2633		return 1;
2634	}
2635	return 0;
2636}
2637
2638/*
2639 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2640 * by NAPI polling).  Handles data events from SGE response queues as well as
2641 * error and other async events as they all use the same MSI vector.  We use
2642 * one SGE response queue per port in this mode and protect all response
2643 * queues with queue 0's lock.
2644 */
2645static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2646{
2647	int new_packets;
2648	struct adapter *adap = cookie;
2649	struct sge_rspq *q = &adap->sge.qs[0].rspq;
2650
2651	spin_lock(&q->lock);
2652
2653	new_packets = rspq_check_napi(&adap->sge.qs[0]);
2654	if (adap->params.nports == 2)
2655		new_packets += rspq_check_napi(&adap->sge.qs[1]);
2656	if (!new_packets && t3_slow_intr_handler(adap) == 0)
2657		q->unhandled_irqs++;
2658
2659	spin_unlock(&q->lock);
2660	return IRQ_HANDLED;
2661}
2662
2663/*
2664 * A helper function that processes responses and issues GTS.
2665 */
2666static inline int process_responses_gts(struct adapter *adap,
2667					struct sge_rspq *rq)
2668{
2669	int work;
2670
2671	work = process_responses(adap, rspq_to_qset(rq), -1);
2672	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2673		     V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2674	return work;
2675}
2676
2677/*
2678 * The legacy INTx interrupt handler.  This needs to handle data events from
2679 * SGE response queues as well as error and other async events as they all use
2680 * the same interrupt pin.  We use one SGE response queue per port in this mode
2681 * and protect all response queues with queue 0's lock.
2682 */
2683static irqreturn_t t3_intr(int irq, void *cookie)
2684{
2685	int work_done, w0, w1;
2686	struct adapter *adap = cookie;
2687	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2688	struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2689
2690	spin_lock(&q0->lock);
2691
2692	w0 = is_new_response(&q0->desc[q0->cidx], q0);
2693	w1 = adap->params.nports == 2 &&
2694	    is_new_response(&q1->desc[q1->cidx], q1);
2695
2696	if (likely(w0 | w1)) {
2697		t3_write_reg(adap, A_PL_CLI, 0);
2698		t3_read_reg(adap, A_PL_CLI);	/* flush */
2699
2700		if (likely(w0))
2701			process_responses_gts(adap, q0);
2702
2703		if (w1)
2704			process_responses_gts(adap, q1);
2705
2706		work_done = w0 | w1;
2707	} else
2708		work_done = t3_slow_intr_handler(adap);
2709
2710	spin_unlock(&q0->lock);
2711	return IRQ_RETVAL(work_done != 0);
2712}
2713
2714/*
2715 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2716 * Handles data events from SGE response queues as well as error and other
2717 * async events as they all use the same interrupt pin.  We use one SGE
2718 * response queue per port in this mode and protect all response queues with
2719 * queue 0's lock.
2720 */
2721static irqreturn_t t3b_intr(int irq, void *cookie)
2722{
2723	u32 map;
2724	struct adapter *adap = cookie;
2725	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2726
2727	t3_write_reg(adap, A_PL_CLI, 0);
2728	map = t3_read_reg(adap, A_SG_DATA_INTR);
2729
2730	if (unlikely(!map))	/* shared interrupt, most likely */
2731		return IRQ_NONE;
2732
2733	spin_lock(&q0->lock);
2734
2735	if (unlikely(map & F_ERRINTR))
2736		t3_slow_intr_handler(adap);
2737
2738	if (likely(map & 1))
2739		process_responses_gts(adap, q0);
2740
2741	if (map & 2)
2742		process_responses_gts(adap, &adap->sge.qs[1].rspq);
2743
2744	spin_unlock(&q0->lock);
2745	return IRQ_HANDLED;
2746}
2747
2748/*
2749 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2750 * Handles data events from SGE response queues as well as error and other
2751 * async events as they all use the same interrupt pin.  We use one SGE
2752 * response queue per port in this mode and protect all response queues with
2753 * queue 0's lock.
2754 */
2755static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2756{
2757	u32 map;
2758	struct adapter *adap = cookie;
2759	struct sge_qset *qs0 = &adap->sge.qs[0];
2760	struct sge_rspq *q0 = &qs0->rspq;
2761
2762	t3_write_reg(adap, A_PL_CLI, 0);
2763	map = t3_read_reg(adap, A_SG_DATA_INTR);
2764
2765	if (unlikely(!map))	/* shared interrupt, most likely */
2766		return IRQ_NONE;
2767
2768	spin_lock(&q0->lock);
2769
2770	if (unlikely(map & F_ERRINTR))
2771		t3_slow_intr_handler(adap);
2772
2773	if (likely(map & 1))
2774		napi_schedule(&qs0->napi);
2775
2776	if (map & 2)
2777		napi_schedule(&adap->sge.qs[1].napi);
2778
2779	spin_unlock(&q0->lock);
2780	return IRQ_HANDLED;
2781}
2782
2783/**
2784 *	t3_intr_handler - select the top-level interrupt handler
2785 *	@adap: the adapter
2786 *	@polling: whether using NAPI to service response queues
2787 *
2788 *	Selects the top-level interrupt handler based on the type of interrupts
2789 *	(MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2790 *	response queues.
2791 */
2792irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2793{
2794	if (adap->flags & USING_MSIX)
2795		return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2796	if (adap->flags & USING_MSI)
2797		return polling ? t3_intr_msi_napi : t3_intr_msi;
2798	if (adap->params.rev > 0)
2799		return polling ? t3b_intr_napi : t3b_intr;
2800	return t3_intr;
2801}
2802
2803#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2804		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2805		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2806		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2807		    F_HIRCQPARITYERROR)
2808#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2809#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2810		      F_RSPQDISABLED)
2811
2812/**
2813 *	t3_sge_err_intr_handler - SGE async event interrupt handler
2814 *	@adapter: the adapter
2815 *
2816 *	Interrupt handler for SGE asynchronous (non-data) events.
2817 */
2818void t3_sge_err_intr_handler(struct adapter *adapter)
2819{
2820	unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE) &
2821				 ~F_FLEMPTY;
2822
2823	if (status & SGE_PARERR)
2824		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2825			 status & SGE_PARERR);
2826	if (status & SGE_FRAMINGERR)
2827		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2828			 status & SGE_FRAMINGERR);
2829
2830	if (status & F_RSPQCREDITOVERFOW)
2831		CH_ALERT(adapter, "SGE response queue credit overflow\n");
2832
2833	if (status & F_RSPQDISABLED) {
2834		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2835
2836		CH_ALERT(adapter,
2837			 "packet delivered to disabled response queue "
2838			 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2839	}
2840
2841	if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2842		queue_work(cxgb3_wq, &adapter->db_drop_task);
2843
2844	if (status & (F_HIPRIORITYDBFULL | F_LOPRIORITYDBFULL))
2845		queue_work(cxgb3_wq, &adapter->db_full_task);
2846
2847	if (status & (F_HIPRIORITYDBEMPTY | F_LOPRIORITYDBEMPTY))
2848		queue_work(cxgb3_wq, &adapter->db_empty_task);
2849
2850	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2851	if (status &  SGE_FATALERR)
2852		t3_fatal_err(adapter);
2853}
2854
2855/**
2856 *	sge_timer_tx - perform periodic maintenance of an SGE qset
2857 *	@data: the SGE queue set to maintain
2858 *
2859 *	Runs periodically from a timer to perform maintenance of an SGE queue
2860 *	set.  It performs two tasks:
2861 *
2862 *	Cleans up any completed Tx descriptors that may still be pending.
2863 *	Normal descriptor cleanup happens when new packets are added to a Tx
2864 *	queue so this timer is relatively infrequent and does any cleanup only
2865 *	if the Tx queue has not seen any new packets in a while.  We make a
2866 *	best effort attempt to reclaim descriptors, in that we don't wait
2867 *	around if we cannot get a queue's lock (which most likely is because
2868 *	someone else is queueing new packets and so will also handle the clean
2869 *	up).  Since control queues use immediate data exclusively we don't
2870 *	bother cleaning them up here.
2871 *
2872 */
2873static void sge_timer_tx(unsigned long data)
2874{
2875	struct sge_qset *qs = (struct sge_qset *)data;
2876	struct port_info *pi = netdev_priv(qs->netdev);
2877	struct adapter *adap = pi->adapter;
2878	unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0};
2879	unsigned long next_period;
2880
2881	if (__netif_tx_trylock(qs->tx_q)) {
2882                tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH],
2883                                                     TX_RECLAIM_TIMER_CHUNK);
2884		__netif_tx_unlock(qs->tx_q);
2885	}
2886
2887	if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2888		tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD],
2889						     TX_RECLAIM_TIMER_CHUNK);
2890		spin_unlock(&qs->txq[TXQ_OFLD].lock);
2891	}
2892
2893	next_period = TX_RECLAIM_PERIOD >>
2894                      (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) /
2895                      TX_RECLAIM_TIMER_CHUNK);
2896	mod_timer(&qs->tx_reclaim_timer, jiffies + next_period);
2897}
2898
2899/*
2900 *	sge_timer_rx - perform periodic maintenance of an SGE qset
2901 *	@data: the SGE queue set to maintain
2902 *
2903 *	a) Replenishes Rx queues that have run out due to memory shortage.
2904 *	Normally new Rx buffers are added when existing ones are consumed but
2905 *	when out of memory a queue can become empty.  We try to add only a few
2906 *	buffers here, the queue will be replenished fully as these new buffers
2907 *	are used up if memory shortage has subsided.
2908 *
2909 *	b) Return coalesced response queue credits in case a response queue is
2910 *	starved.
2911 *
2912 */
2913static void sge_timer_rx(unsigned long data)
2914{
2915	spinlock_t *lock;
2916	struct sge_qset *qs = (struct sge_qset *)data;
2917	struct port_info *pi = netdev_priv(qs->netdev);
2918	struct adapter *adap = pi->adapter;
2919	u32 status;
2920
2921	lock = adap->params.rev > 0 ?
2922	       &qs->rspq.lock : &adap->sge.qs[0].rspq.lock;
2923
2924	if (!spin_trylock_irq(lock))
2925		goto out;
2926
2927	if (napi_is_scheduled(&qs->napi))
2928		goto unlock;
2929
2930	if (adap->params.rev < 4) {
2931		status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2932
2933		if (status & (1 << qs->rspq.cntxt_id)) {
2934			qs->rspq.starved++;
2935			if (qs->rspq.credits) {
2936				qs->rspq.credits--;
2937				refill_rspq(adap, &qs->rspq, 1);
2938				qs->rspq.restarted++;
2939				t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2940					     1 << qs->rspq.cntxt_id);
2941			}
2942		}
2943	}
2944
2945	if (qs->fl[0].credits < qs->fl[0].size)
2946		__refill_fl(adap, &qs->fl[0]);
2947	if (qs->fl[1].credits < qs->fl[1].size)
2948		__refill_fl(adap, &qs->fl[1]);
2949
2950unlock:
2951	spin_unlock_irq(lock);
2952out:
2953	mod_timer(&qs->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
2954}
2955
2956/**
2957 *	t3_update_qset_coalesce - update coalescing settings for a queue set
2958 *	@qs: the SGE queue set
2959 *	@p: new queue set parameters
2960 *
2961 *	Update the coalescing settings for an SGE queue set.  Nothing is done
2962 *	if the queue set is not initialized yet.
2963 */
2964void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2965{
2966	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2967	qs->rspq.polling = p->polling;
2968	qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2969}
2970
2971/**
2972 *	t3_sge_alloc_qset - initialize an SGE queue set
2973 *	@adapter: the adapter
2974 *	@id: the queue set id
2975 *	@nports: how many Ethernet ports will be using this queue set
2976 *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2977 *	@p: configuration parameters for this queue set
2978 *	@ntxq: number of Tx queues for the queue set
2979 *	@netdev: net device associated with this queue set
2980 *	@netdevq: net device TX queue associated with this queue set
2981 *
2982 *	Allocate resources and initialize an SGE queue set.  A queue set
2983 *	comprises a response queue, two Rx free-buffer queues, and up to 3
2984 *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2985 *	queue, offload queue, and control queue.
2986 */
2987int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2988		      int irq_vec_idx, const struct qset_params *p,
2989		      int ntxq, struct net_device *dev,
2990		      struct netdev_queue *netdevq)
2991{
2992	int i, avail, ret = -ENOMEM;
2993	struct sge_qset *q = &adapter->sge.qs[id];
2994
2995	init_qset_cntxt(q, id);
2996	setup_timer(&q->tx_reclaim_timer, sge_timer_tx, (unsigned long)q);
2997	setup_timer(&q->rx_reclaim_timer, sge_timer_rx, (unsigned long)q);
2998
2999	q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
3000				   sizeof(struct rx_desc),
3001				   sizeof(struct rx_sw_desc),
3002				   &q->fl[0].phys_addr, &q->fl[0].sdesc);
3003	if (!q->fl[0].desc)
3004		goto err;
3005
3006	q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
3007				   sizeof(struct rx_desc),
3008				   sizeof(struct rx_sw_desc),
3009				   &q->fl[1].phys_addr, &q->fl[1].sdesc);
3010	if (!q->fl[1].desc)
3011		goto err;
3012
3013	q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
3014				  sizeof(struct rsp_desc), 0,
3015				  &q->rspq.phys_addr, NULL);
3016	if (!q->rspq.desc)
3017		goto err;
3018
3019	for (i = 0; i < ntxq; ++i) {
3020		/*
3021		 * The control queue always uses immediate data so does not
3022		 * need to keep track of any sk_buffs.
3023		 */
3024		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
3025
3026		q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
3027					    sizeof(struct tx_desc), sz,
3028					    &q->txq[i].phys_addr,
3029					    &q->txq[i].sdesc);
3030		if (!q->txq[i].desc)
3031			goto err;
3032
3033		q->txq[i].gen = 1;
3034		q->txq[i].size = p->txq_size[i];
3035		spin_lock_init(&q->txq[i].lock);
3036		skb_queue_head_init(&q->txq[i].sendq);
3037	}
3038
3039	tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
3040		     (unsigned long)q);
3041	tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
3042		     (unsigned long)q);
3043
3044	q->fl[0].gen = q->fl[1].gen = 1;
3045	q->fl[0].size = p->fl_size;
3046	q->fl[1].size = p->jumbo_size;
3047
3048	q->rspq.gen = 1;
3049	q->rspq.size = p->rspq_size;
3050	spin_lock_init(&q->rspq.lock);
3051	skb_queue_head_init(&q->rspq.rx_queue);
3052
3053	q->txq[TXQ_ETH].stop_thres = nports *
3054	    flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
3055
3056#if FL0_PG_CHUNK_SIZE > 0
3057	q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
3058#else
3059	q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
3060#endif
3061#if FL1_PG_CHUNK_SIZE > 0
3062	q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
3063#else
3064	q->fl[1].buf_size = is_offload(adapter) ?
3065		(16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
3066		MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
3067#endif
3068
3069	q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
3070	q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
3071	q->fl[0].order = FL0_PG_ORDER;
3072	q->fl[1].order = FL1_PG_ORDER;
3073	q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
3074	q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
3075
3076	spin_lock_irq(&adapter->sge.reg_lock);
3077
3078	/* FL threshold comparison uses < */
3079	ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
3080				   q->rspq.phys_addr, q->rspq.size,
3081				   q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
3082	if (ret)
3083		goto err_unlock;
3084
3085	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
3086		ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
3087					  q->fl[i].phys_addr, q->fl[i].size,
3088					  q->fl[i].buf_size - SGE_PG_RSVD,
3089					  p->cong_thres, 1, 0);
3090		if (ret)
3091			goto err_unlock;
3092	}
3093
3094	ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
3095				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
3096				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
3097				 1, 0);
3098	if (ret)
3099		goto err_unlock;
3100
3101	if (ntxq > 1) {
3102		ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
3103					 USE_GTS, SGE_CNTXT_OFLD, id,
3104					 q->txq[TXQ_OFLD].phys_addr,
3105					 q->txq[TXQ_OFLD].size, 0, 1, 0);
3106		if (ret)
3107			goto err_unlock;
3108	}
3109
3110	if (ntxq > 2) {
3111		ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
3112					 SGE_CNTXT_CTRL, id,
3113					 q->txq[TXQ_CTRL].phys_addr,
3114					 q->txq[TXQ_CTRL].size,
3115					 q->txq[TXQ_CTRL].token, 1, 0);
3116		if (ret)
3117			goto err_unlock;
3118	}
3119
3120	spin_unlock_irq(&adapter->sge.reg_lock);
3121
3122	q->adap = adapter;
3123	q->netdev = dev;
3124	q->tx_q = netdevq;
3125	t3_update_qset_coalesce(q, p);
3126
3127	avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
3128			  GFP_KERNEL | __GFP_COMP);
3129	if (!avail) {
3130		CH_ALERT(adapter, "free list queue 0 initialization failed\n");
3131		goto err;
3132	}
3133	if (avail < q->fl[0].size)
3134		CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
3135			avail);
3136
3137	avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
3138			  GFP_KERNEL | __GFP_COMP);
3139	if (avail < q->fl[1].size)
3140		CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
3141			avail);
3142	refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
3143
3144	t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
3145		     V_NEWTIMER(q->rspq.holdoff_tmr));
3146
3147	return 0;
3148
3149err_unlock:
3150	spin_unlock_irq(&adapter->sge.reg_lock);
3151err:
3152	t3_free_qset(adapter, q);
3153	return ret;
3154}
3155
3156/**
3157 *      t3_start_sge_timers - start SGE timer call backs
3158 *      @adap: the adapter
3159 *
3160 *      Starts each SGE queue set's timer call back
3161 */
3162void t3_start_sge_timers(struct adapter *adap)
3163{
3164	int i;
3165
3166	for (i = 0; i < SGE_QSETS; ++i) {
3167		struct sge_qset *q = &adap->sge.qs[i];
3168
3169	if (q->tx_reclaim_timer.function)
3170		mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
3171
3172	if (q->rx_reclaim_timer.function)
3173		mod_timer(&q->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
3174	}
3175}
3176
3177/**
3178 *	t3_stop_sge_timers - stop SGE timer call backs
3179 *	@adap: the adapter
3180 *
3181 *	Stops each SGE queue set's timer call back
3182 */
3183void t3_stop_sge_timers(struct adapter *adap)
3184{
3185	int i;
3186
3187	for (i = 0; i < SGE_QSETS; ++i) {
3188		struct sge_qset *q = &adap->sge.qs[i];
3189
3190		if (q->tx_reclaim_timer.function)
3191			del_timer_sync(&q->tx_reclaim_timer);
3192		if (q->rx_reclaim_timer.function)
3193			del_timer_sync(&q->rx_reclaim_timer);
3194	}
3195}
3196
3197/**
3198 *	t3_free_sge_resources - free SGE resources
3199 *	@adap: the adapter
3200 *
3201 *	Frees resources used by the SGE queue sets.
3202 */
3203void t3_free_sge_resources(struct adapter *adap)
3204{
3205	int i;
3206
3207	for (i = 0; i < SGE_QSETS; ++i)
3208		t3_free_qset(adap, &adap->sge.qs[i]);
3209}
3210
3211/**
3212 *	t3_sge_start - enable SGE
3213 *	@adap: the adapter
3214 *
3215 *	Enables the SGE for DMAs.  This is the last step in starting packet
3216 *	transfers.
3217 */
3218void t3_sge_start(struct adapter *adap)
3219{
3220	t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3221}
3222
3223/**
3224 *	t3_sge_stop - disable SGE operation
3225 *	@adap: the adapter
3226 *
3227 *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
3228 *	from error interrupts) or from normal process context.  In the latter
3229 *	case it also disables any pending queue restart tasklets.  Note that
3230 *	if it is called in interrupt context it cannot disable the restart
3231 *	tasklets as it cannot wait, however the tasklets will have no effect
3232 *	since the doorbells are disabled and the driver will call this again
3233 *	later from process context, at which time the tasklets will be stopped
3234 *	if they are still running.
3235 */
3236void t3_sge_stop(struct adapter *adap)
3237{
3238	t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3239	if (!in_interrupt()) {
3240		int i;
3241
3242		for (i = 0; i < SGE_QSETS; ++i) {
3243			struct sge_qset *qs = &adap->sge.qs[i];
3244
3245			tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3246			tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3247		}
3248	}
3249}
3250
3251/**
3252 *	t3_sge_init - initialize SGE
3253 *	@adap: the adapter
3254 *	@p: the SGE parameters
3255 *
3256 *	Performs SGE initialization needed every time after a chip reset.
3257 *	We do not initialize any of the queue sets here, instead the driver
3258 *	top-level must request those individually.  We also do not enable DMA
3259 *	here, that should be done after the queues have been set up.
3260 */
3261void t3_sge_init(struct adapter *adap, struct sge_params *p)
3262{
3263	unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3264
3265	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3266	    F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3267	    V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3268	    V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3269#if SGE_NUM_GENBITS == 1
3270	ctrl |= F_EGRGENCTRL;
3271#endif
3272	if (adap->params.rev > 0) {
3273		if (!(adap->flags & (USING_MSIX | USING_MSI)))
3274			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3275	}
3276	t3_write_reg(adap, A_SG_CONTROL, ctrl);
3277	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3278		     V_LORCQDRBTHRSH(512));
3279	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3280	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3281		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3282	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3283		     adap->params.rev < T3_REV_C ? 1000 : 500);
3284	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3285	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3286	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3287	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3288	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3289}
3290
3291/**
3292 *	t3_sge_prep - one-time SGE initialization
3293 *	@adap: the associated adapter
3294 *	@p: SGE parameters
3295 *
3296 *	Performs one-time initialization of SGE SW state.  Includes determining
3297 *	defaults for the assorted SGE parameters, which admins can change until
3298 *	they are used to initialize the SGE.
3299 */
3300void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3301{
3302	int i;
3303
3304	p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3305	    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3306
3307	for (i = 0; i < SGE_QSETS; ++i) {
3308		struct qset_params *q = p->qset + i;
3309
3310		q->polling = adap->params.rev > 0;
3311		q->coalesce_usecs = 5;
3312		q->rspq_size = 1024;
3313		q->fl_size = 1024;
3314 		q->jumbo_size = 512;
3315		q->txq_size[TXQ_ETH] = 1024;
3316		q->txq_size[TXQ_OFLD] = 1024;
3317		q->txq_size[TXQ_CTRL] = 256;
3318		q->cong_thres = 0;
3319	}
3320
3321	spin_lock_init(&adap->sge.reg_lock);
3322}
3323
3324/**
3325 *	t3_get_desc - dump an SGE descriptor for debugging purposes
3326 *	@qs: the queue set
3327 *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3328 *	@idx: the descriptor index in the queue
3329 *	@data: where to dump the descriptor contents
3330 *
3331 *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3332 *	size of the descriptor.
3333 */
3334int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3335		unsigned char *data)
3336{
3337	if (qnum >= 6)
3338		return -EINVAL;
3339
3340	if (qnum < 3) {
3341		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3342			return -EINVAL;
3343		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3344		return sizeof(struct tx_desc);
3345	}
3346
3347	if (qnum == 3) {
3348		if (!qs->rspq.desc || idx >= qs->rspq.size)
3349			return -EINVAL;
3350		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3351		return sizeof(struct rsp_desc);
3352	}
3353
3354	qnum -= 4;
3355	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3356		return -EINVAL;
3357	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3358	return sizeof(struct rx_desc);
3359}
3360