• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/drivers/net/cxgb4/
1/*
2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
3 *
4 * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include <linux/skbuff.h>
36#include <linux/netdevice.h>
37#include <linux/etherdevice.h>
38#include <linux/if_vlan.h>
39#include <linux/ip.h>
40#include <linux/dma-mapping.h>
41#include <linux/jiffies.h>
42#include <net/ipv6.h>
43#include <net/tcp.h>
44#include "cxgb4.h"
45#include "t4_regs.h"
46#include "t4_msg.h"
47#include "t4fw_api.h"
48
49/*
50 * Rx buffer size.  We use largish buffers if possible but settle for single
51 * pages under memory shortage.
52 */
53#if PAGE_SHIFT >= 16
54# define FL_PG_ORDER 0
55#else
56# define FL_PG_ORDER (16 - PAGE_SHIFT)
57#endif
58
59/* RX_PULL_LEN should be <= RX_COPY_THRES */
60#define RX_COPY_THRES    256
61#define RX_PULL_LEN      128
62
63/*
64 * Main body length for sk_buffs used for Rx Ethernet packets with fragments.
65 * Should be >= RX_PULL_LEN but possibly bigger to give pskb_may_pull some room.
66 */
67#define RX_PKT_SKB_LEN   512
68
69/* Ethernet header padding prepended to RX_PKTs */
70#define RX_PKT_PAD 2
71
72/*
73 * Max number of Tx descriptors we clean up at a time.  Should be modest as
74 * freeing skbs isn't cheap and it happens while holding locks.  We just need
75 * to free packets faster than they arrive, we eventually catch up and keep
76 * the amortized cost reasonable.  Must be >= 2 * TXQ_STOP_THRES.
77 */
78#define MAX_TX_RECLAIM 16
79
80/*
81 * Max number of Rx buffers we replenish at a time.  Again keep this modest,
82 * allocating buffers isn't cheap either.
83 */
84#define MAX_RX_REFILL 16U
85
86/*
87 * Period of the Rx queue check timer.  This timer is infrequent as it has
88 * something to do only when the system experiences severe memory shortage.
89 */
90#define RX_QCHECK_PERIOD (HZ / 2)
91
92/*
93 * Period of the Tx queue check timer.
94 */
95#define TX_QCHECK_PERIOD (HZ / 2)
96
97/*
98 * Max number of Tx descriptors to be reclaimed by the Tx timer.
99 */
100#define MAX_TIMER_TX_RECLAIM 100
101
102/*
103 * Timer index used when backing off due to memory shortage.
104 */
105#define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
106
107/*
108 * An FL with <= FL_STARVE_THRES buffers is starving and a periodic timer will
109 * attempt to refill it.
110 */
111#define FL_STARVE_THRES 4
112
113/*
114 * Suspend an Ethernet Tx queue with fewer available descriptors than this.
115 * This is the same as calc_tx_descs() for a TSO packet with
116 * nr_frags == MAX_SKB_FRAGS.
117 */
118#define ETHTXQ_STOP_THRES \
119	(1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
120
121/*
122 * Suspension threshold for non-Ethernet Tx queues.  We require enough room
123 * for a full sized WR.
124 */
125#define TXQ_STOP_THRES (SGE_MAX_WR_LEN / sizeof(struct tx_desc))
126
127/*
128 * Max Tx descriptor space we allow for an Ethernet packet to be inlined
129 * into a WR.
130 */
131#define MAX_IMM_TX_PKT_LEN 128
132
133/*
134 * Max size of a WR sent through a control Tx queue.
135 */
136#define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN
137
138enum {
139	/* packet alignment in FL buffers */
140	FL_ALIGN = L1_CACHE_BYTES < 32 ? 32 : L1_CACHE_BYTES,
141	/* egress status entry size */
142	STAT_LEN = L1_CACHE_BYTES > 64 ? 128 : 64
143};
144
145struct tx_sw_desc {                /* SW state per Tx descriptor */
146	struct sk_buff *skb;
147	struct ulptx_sgl *sgl;
148};
149
150struct rx_sw_desc {                /* SW state per Rx descriptor */
151	struct page *page;
152	dma_addr_t dma_addr;
153};
154
155/*
156 * The low bits of rx_sw_desc.dma_addr have special meaning.
157 */
158enum {
159	RX_LARGE_BUF    = 1 << 0, /* buffer is larger than PAGE_SIZE */
160	RX_UNMAPPED_BUF = 1 << 1, /* buffer is not mapped */
161};
162
163static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *d)
164{
165	return d->dma_addr & ~(dma_addr_t)(RX_LARGE_BUF | RX_UNMAPPED_BUF);
166}
167
168static inline bool is_buf_mapped(const struct rx_sw_desc *d)
169{
170	return !(d->dma_addr & RX_UNMAPPED_BUF);
171}
172
173/**
174 *	txq_avail - return the number of available slots in a Tx queue
175 *	@q: the Tx queue
176 *
177 *	Returns the number of descriptors in a Tx queue available to write new
178 *	packets.
179 */
180static inline unsigned int txq_avail(const struct sge_txq *q)
181{
182	return q->size - 1 - q->in_use;
183}
184
185/**
186 *	fl_cap - return the capacity of a free-buffer list
187 *	@fl: the FL
188 *
189 *	Returns the capacity of a free-buffer list.  The capacity is less than
190 *	the size because one descriptor needs to be left unpopulated, otherwise
191 *	HW will think the FL is empty.
192 */
193static inline unsigned int fl_cap(const struct sge_fl *fl)
194{
195	return fl->size - 8;   /* 1 descriptor = 8 buffers */
196}
197
198static inline bool fl_starving(const struct sge_fl *fl)
199{
200	return fl->avail - fl->pend_cred <= FL_STARVE_THRES;
201}
202
203static int map_skb(struct device *dev, const struct sk_buff *skb,
204		   dma_addr_t *addr)
205{
206	const skb_frag_t *fp, *end;
207	const struct skb_shared_info *si;
208
209	*addr = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
210	if (dma_mapping_error(dev, *addr))
211		goto out_err;
212
213	si = skb_shinfo(skb);
214	end = &si->frags[si->nr_frags];
215
216	for (fp = si->frags; fp < end; fp++) {
217		*++addr = dma_map_page(dev, fp->page, fp->page_offset, fp->size,
218				       DMA_TO_DEVICE);
219		if (dma_mapping_error(dev, *addr))
220			goto unwind;
221	}
222	return 0;
223
224unwind:
225	while (fp-- > si->frags)
226		dma_unmap_page(dev, *--addr, fp->size, DMA_TO_DEVICE);
227
228	dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE);
229out_err:
230	return -ENOMEM;
231}
232
233#ifdef CONFIG_NEED_DMA_MAP_STATE
234static void unmap_skb(struct device *dev, const struct sk_buff *skb,
235		      const dma_addr_t *addr)
236{
237	const skb_frag_t *fp, *end;
238	const struct skb_shared_info *si;
239
240	dma_unmap_single(dev, *addr++, skb_headlen(skb), DMA_TO_DEVICE);
241
242	si = skb_shinfo(skb);
243	end = &si->frags[si->nr_frags];
244	for (fp = si->frags; fp < end; fp++)
245		dma_unmap_page(dev, *addr++, fp->size, DMA_TO_DEVICE);
246}
247
248/**
249 *	deferred_unmap_destructor - unmap a packet when it is freed
250 *	@skb: the packet
251 *
252 *	This is the packet destructor used for Tx packets that need to remain
253 *	mapped until they are freed rather than until their Tx descriptors are
254 *	freed.
255 */
256static void deferred_unmap_destructor(struct sk_buff *skb)
257{
258	unmap_skb(skb->dev->dev.parent, skb, (dma_addr_t *)skb->head);
259}
260#endif
261
262static void unmap_sgl(struct device *dev, const struct sk_buff *skb,
263		      const struct ulptx_sgl *sgl, const struct sge_txq *q)
264{
265	const struct ulptx_sge_pair *p;
266	unsigned int nfrags = skb_shinfo(skb)->nr_frags;
267
268	if (likely(skb_headlen(skb)))
269		dma_unmap_single(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0),
270				 DMA_TO_DEVICE);
271	else {
272		dma_unmap_page(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0),
273			       DMA_TO_DEVICE);
274		nfrags--;
275	}
276
277	/*
278	 * the complexity below is because of the possibility of a wrap-around
279	 * in the middle of an SGL
280	 */
281	for (p = sgl->sge; nfrags >= 2; nfrags -= 2) {
282		if (likely((u8 *)(p + 1) <= (u8 *)q->stat)) {
283unmap:			dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
284				       ntohl(p->len[0]), DMA_TO_DEVICE);
285			dma_unmap_page(dev, be64_to_cpu(p->addr[1]),
286				       ntohl(p->len[1]), DMA_TO_DEVICE);
287			p++;
288		} else if ((u8 *)p == (u8 *)q->stat) {
289			p = (const struct ulptx_sge_pair *)q->desc;
290			goto unmap;
291		} else if ((u8 *)p + 8 == (u8 *)q->stat) {
292			const __be64 *addr = (const __be64 *)q->desc;
293
294			dma_unmap_page(dev, be64_to_cpu(addr[0]),
295				       ntohl(p->len[0]), DMA_TO_DEVICE);
296			dma_unmap_page(dev, be64_to_cpu(addr[1]),
297				       ntohl(p->len[1]), DMA_TO_DEVICE);
298			p = (const struct ulptx_sge_pair *)&addr[2];
299		} else {
300			const __be64 *addr = (const __be64 *)q->desc;
301
302			dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
303				       ntohl(p->len[0]), DMA_TO_DEVICE);
304			dma_unmap_page(dev, be64_to_cpu(addr[0]),
305				       ntohl(p->len[1]), DMA_TO_DEVICE);
306			p = (const struct ulptx_sge_pair *)&addr[1];
307		}
308	}
309	if (nfrags) {
310		__be64 addr;
311
312		if ((u8 *)p == (u8 *)q->stat)
313			p = (const struct ulptx_sge_pair *)q->desc;
314		addr = (u8 *)p + 16 <= (u8 *)q->stat ? p->addr[0] :
315						       *(const __be64 *)q->desc;
316		dma_unmap_page(dev, be64_to_cpu(addr), ntohl(p->len[0]),
317			       DMA_TO_DEVICE);
318	}
319}
320
321/**
322 *	free_tx_desc - reclaims Tx descriptors and their buffers
323 *	@adapter: the adapter
324 *	@q: the Tx queue to reclaim descriptors from
325 *	@n: the number of descriptors to reclaim
326 *	@unmap: whether the buffers should be unmapped for DMA
327 *
328 *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
329 *	Tx buffers.  Called with the Tx queue lock held.
330 */
331static void free_tx_desc(struct adapter *adap, struct sge_txq *q,
332			 unsigned int n, bool unmap)
333{
334	struct tx_sw_desc *d;
335	unsigned int cidx = q->cidx;
336	struct device *dev = adap->pdev_dev;
337
338	d = &q->sdesc[cidx];
339	while (n--) {
340		if (d->skb) {                       /* an SGL is present */
341			if (unmap)
342				unmap_sgl(dev, d->skb, d->sgl, q);
343			kfree_skb(d->skb);
344			d->skb = NULL;
345		}
346		++d;
347		if (++cidx == q->size) {
348			cidx = 0;
349			d = q->sdesc;
350		}
351	}
352	q->cidx = cidx;
353}
354
355/*
356 * Return the number of reclaimable descriptors in a Tx queue.
357 */
358static inline int reclaimable(const struct sge_txq *q)
359{
360	int hw_cidx = ntohs(q->stat->cidx);
361	hw_cidx -= q->cidx;
362	return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx;
363}
364
365/**
366 *	reclaim_completed_tx - reclaims completed Tx descriptors
367 *	@adap: the adapter
368 *	@q: the Tx queue to reclaim completed descriptors from
369 *	@unmap: whether the buffers should be unmapped for DMA
370 *
371 *	Reclaims Tx descriptors that the SGE has indicated it has processed,
372 *	and frees the associated buffers if possible.  Called with the Tx
373 *	queue locked.
374 */
375static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
376					bool unmap)
377{
378	int avail = reclaimable(q);
379
380	if (avail) {
381		/*
382		 * Limit the amount of clean up work we do at a time to keep
383		 * the Tx lock hold time O(1).
384		 */
385		if (avail > MAX_TX_RECLAIM)
386			avail = MAX_TX_RECLAIM;
387
388		free_tx_desc(adap, q, avail, unmap);
389		q->in_use -= avail;
390	}
391}
392
393static inline int get_buf_size(const struct rx_sw_desc *d)
394{
395#if FL_PG_ORDER > 0
396	return (d->dma_addr & RX_LARGE_BUF) ? (PAGE_SIZE << FL_PG_ORDER) :
397					      PAGE_SIZE;
398#else
399	return PAGE_SIZE;
400#endif
401}
402
403/**
404 *	free_rx_bufs - free the Rx buffers on an SGE free list
405 *	@adap: the adapter
406 *	@q: the SGE free list to free buffers from
407 *	@n: how many buffers to free
408 *
409 *	Release the next @n buffers on an SGE free-buffer Rx queue.   The
410 *	buffers must be made inaccessible to HW before calling this function.
411 */
412static void free_rx_bufs(struct adapter *adap, struct sge_fl *q, int n)
413{
414	while (n--) {
415		struct rx_sw_desc *d = &q->sdesc[q->cidx];
416
417		if (is_buf_mapped(d))
418			dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
419				       get_buf_size(d), PCI_DMA_FROMDEVICE);
420		put_page(d->page);
421		d->page = NULL;
422		if (++q->cidx == q->size)
423			q->cidx = 0;
424		q->avail--;
425	}
426}
427
428/**
429 *	unmap_rx_buf - unmap the current Rx buffer on an SGE free list
430 *	@adap: the adapter
431 *	@q: the SGE free list
432 *
433 *	Unmap the current buffer on an SGE free-buffer Rx queue.   The
434 *	buffer must be made inaccessible to HW before calling this function.
435 *
436 *	This is similar to @free_rx_bufs above but does not free the buffer.
437 *	Do note that the FL still loses any further access to the buffer.
438 */
439static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q)
440{
441	struct rx_sw_desc *d = &q->sdesc[q->cidx];
442
443	if (is_buf_mapped(d))
444		dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
445			       get_buf_size(d), PCI_DMA_FROMDEVICE);
446	d->page = NULL;
447	if (++q->cidx == q->size)
448		q->cidx = 0;
449	q->avail--;
450}
451
452static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
453{
454	if (q->pend_cred >= 8) {
455		wmb();
456		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO |
457			     QID(q->cntxt_id) | PIDX(q->pend_cred / 8));
458		q->pend_cred &= 7;
459	}
460}
461
462static inline void set_rx_sw_desc(struct rx_sw_desc *sd, struct page *pg,
463				  dma_addr_t mapping)
464{
465	sd->page = pg;
466	sd->dma_addr = mapping;      /* includes size low bits */
467}
468
469/**
470 *	refill_fl - refill an SGE Rx buffer ring
471 *	@adap: the adapter
472 *	@q: the ring to refill
473 *	@n: the number of new buffers to allocate
474 *	@gfp: the gfp flags for the allocations
475 *
476 *	(Re)populate an SGE free-buffer queue with up to @n new packet buffers,
477 *	allocated with the supplied gfp flags.  The caller must assure that
478 *	@n does not exceed the queue's capacity.  If afterwards the queue is
479 *	found critically low mark it as starving in the bitmap of starving FLs.
480 *
481 *	Returns the number of buffers allocated.
482 */
483static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
484			      gfp_t gfp)
485{
486	struct page *pg;
487	dma_addr_t mapping;
488	unsigned int cred = q->avail;
489	__be64 *d = &q->desc[q->pidx];
490	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
491
492	gfp |= __GFP_NOWARN;         /* failures are expected */
493
494#if FL_PG_ORDER > 0
495	/*
496	 * Prefer large buffers
497	 */
498	while (n) {
499		pg = alloc_pages(gfp | __GFP_COMP, FL_PG_ORDER);
500		if (unlikely(!pg)) {
501			q->large_alloc_failed++;
502			break;       /* fall back to single pages */
503		}
504
505		mapping = dma_map_page(adap->pdev_dev, pg, 0,
506				       PAGE_SIZE << FL_PG_ORDER,
507				       PCI_DMA_FROMDEVICE);
508		if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
509			__free_pages(pg, FL_PG_ORDER);
510			goto out;   /* do not try small pages for this error */
511		}
512		mapping |= RX_LARGE_BUF;
513		*d++ = cpu_to_be64(mapping);
514
515		set_rx_sw_desc(sd, pg, mapping);
516		sd++;
517
518		q->avail++;
519		if (++q->pidx == q->size) {
520			q->pidx = 0;
521			sd = q->sdesc;
522			d = q->desc;
523		}
524		n--;
525	}
526#endif
527
528	while (n--) {
529		pg = __netdev_alloc_page(adap->port[0], gfp);
530		if (unlikely(!pg)) {
531			q->alloc_failed++;
532			break;
533		}
534
535		mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE,
536				       PCI_DMA_FROMDEVICE);
537		if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
538			netdev_free_page(adap->port[0], pg);
539			goto out;
540		}
541		*d++ = cpu_to_be64(mapping);
542
543		set_rx_sw_desc(sd, pg, mapping);
544		sd++;
545
546		q->avail++;
547		if (++q->pidx == q->size) {
548			q->pidx = 0;
549			sd = q->sdesc;
550			d = q->desc;
551		}
552	}
553
554out:	cred = q->avail - cred;
555	q->pend_cred += cred;
556	ring_fl_db(adap, q);
557
558	if (unlikely(fl_starving(q))) {
559		smp_wmb();
560		set_bit(q->cntxt_id, adap->sge.starving_fl);
561	}
562
563	return cred;
564}
565
566static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
567{
568	refill_fl(adap, fl, min(MAX_RX_REFILL, fl_cap(fl) - fl->avail),
569		  GFP_ATOMIC);
570}
571
572/**
573 *	alloc_ring - allocate resources for an SGE descriptor ring
574 *	@dev: the PCI device's core device
575 *	@nelem: the number of descriptors
576 *	@elem_size: the size of each descriptor
577 *	@sw_size: the size of the SW state associated with each ring element
578 *	@phys: the physical address of the allocated ring
579 *	@metadata: address of the array holding the SW state for the ring
580 *	@stat_size: extra space in HW ring for status information
581 *
582 *	Allocates resources for an SGE descriptor ring, such as Tx queues,
583 *	free buffer lists, or response queues.  Each SGE ring requires
584 *	space for its HW descriptors plus, optionally, space for the SW state
585 *	associated with each HW entry (the metadata).  The function returns
586 *	three values: the virtual address for the HW ring (the return value
587 *	of the function), the bus address of the HW ring, and the address
588 *	of the SW ring.
589 */
590static void *alloc_ring(struct device *dev, size_t nelem, size_t elem_size,
591			size_t sw_size, dma_addr_t *phys, void *metadata,
592			size_t stat_size)
593{
594	size_t len = nelem * elem_size + stat_size;
595	void *s = NULL;
596	void *p = dma_alloc_coherent(dev, len, phys, GFP_KERNEL);
597
598	if (!p)
599		return NULL;
600	if (sw_size) {
601		s = kcalloc(nelem, sw_size, GFP_KERNEL);
602
603		if (!s) {
604			dma_free_coherent(dev, len, p, *phys);
605			return NULL;
606		}
607	}
608	if (metadata)
609		*(void **)metadata = s;
610	memset(p, 0, len);
611	return p;
612}
613
614/**
615 *	sgl_len - calculates the size of an SGL of the given capacity
616 *	@n: the number of SGL entries
617 *
618 *	Calculates the number of flits needed for a scatter/gather list that
619 *	can hold the given number of entries.
620 */
621static inline unsigned int sgl_len(unsigned int n)
622{
623	n--;
624	return (3 * n) / 2 + (n & 1) + 2;
625}
626
627/**
628 *	flits_to_desc - returns the num of Tx descriptors for the given flits
629 *	@n: the number of flits
630 *
631 *	Returns the number of Tx descriptors needed for the supplied number
632 *	of flits.
633 */
634static inline unsigned int flits_to_desc(unsigned int n)
635{
636	BUG_ON(n > SGE_MAX_WR_LEN / 8);
637	return DIV_ROUND_UP(n, 8);
638}
639
640/**
641 *	is_eth_imm - can an Ethernet packet be sent as immediate data?
642 *	@skb: the packet
643 *
644 *	Returns whether an Ethernet packet is small enough to fit as
645 *	immediate data.
646 */
647static inline int is_eth_imm(const struct sk_buff *skb)
648{
649	return skb->len <= MAX_IMM_TX_PKT_LEN - sizeof(struct cpl_tx_pkt);
650}
651
652/**
653 *	calc_tx_flits - calculate the number of flits for a packet Tx WR
654 *	@skb: the packet
655 *
656 *	Returns the number of flits needed for a Tx WR for the given Ethernet
657 *	packet, including the needed WR and CPL headers.
658 */
659static inline unsigned int calc_tx_flits(const struct sk_buff *skb)
660{
661	unsigned int flits;
662
663	if (is_eth_imm(skb))
664		return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt), 8);
665
666	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4;
667	if (skb_shinfo(skb)->gso_size)
668		flits += 2;
669	return flits;
670}
671
672/**
673 *	calc_tx_descs - calculate the number of Tx descriptors for a packet
674 *	@skb: the packet
675 *
676 *	Returns the number of Tx descriptors needed for the given Ethernet
677 *	packet, including the needed WR and CPL headers.
678 */
679static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
680{
681	return flits_to_desc(calc_tx_flits(skb));
682}
683
684/**
685 *	write_sgl - populate a scatter/gather list for a packet
686 *	@skb: the packet
687 *	@q: the Tx queue we are writing into
688 *	@sgl: starting location for writing the SGL
689 *	@end: points right after the end of the SGL
690 *	@start: start offset into skb main-body data to include in the SGL
691 *	@addr: the list of bus addresses for the SGL elements
692 *
693 *	Generates a gather list for the buffers that make up a packet.
694 *	The caller must provide adequate space for the SGL that will be written.
695 *	The SGL includes all of the packet's page fragments and the data in its
696 *	main body except for the first @start bytes.  @sgl must be 16-byte
697 *	aligned and within a Tx descriptor with available space.  @end points
698 *	right after the end of the SGL but does not account for any potential
699 *	wrap around, i.e., @end > @sgl.
700 */
701static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
702		      struct ulptx_sgl *sgl, u64 *end, unsigned int start,
703		      const dma_addr_t *addr)
704{
705	unsigned int i, len;
706	struct ulptx_sge_pair *to;
707	const struct skb_shared_info *si = skb_shinfo(skb);
708	unsigned int nfrags = si->nr_frags;
709	struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1];
710
711	len = skb_headlen(skb) - start;
712	if (likely(len)) {
713		sgl->len0 = htonl(len);
714		sgl->addr0 = cpu_to_be64(addr[0] + start);
715		nfrags++;
716	} else {
717		sgl->len0 = htonl(si->frags[0].size);
718		sgl->addr0 = cpu_to_be64(addr[1]);
719	}
720
721	sgl->cmd_nsge = htonl(ULPTX_CMD(ULP_TX_SC_DSGL) | ULPTX_NSGE(nfrags));
722	if (likely(--nfrags == 0))
723		return;
724	/*
725	 * Most of the complexity below deals with the possibility we hit the
726	 * end of the queue in the middle of writing the SGL.  For this case
727	 * only we create the SGL in a temporary buffer and then copy it.
728	 */
729	to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge;
730
731	for (i = (nfrags != si->nr_frags); nfrags >= 2; nfrags -= 2, to++) {
732		to->len[0] = cpu_to_be32(si->frags[i].size);
733		to->len[1] = cpu_to_be32(si->frags[++i].size);
734		to->addr[0] = cpu_to_be64(addr[i]);
735		to->addr[1] = cpu_to_be64(addr[++i]);
736	}
737	if (nfrags) {
738		to->len[0] = cpu_to_be32(si->frags[i].size);
739		to->len[1] = cpu_to_be32(0);
740		to->addr[0] = cpu_to_be64(addr[i + 1]);
741	}
742	if (unlikely((u8 *)end > (u8 *)q->stat)) {
743		unsigned int part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1;
744
745		if (likely(part0))
746			memcpy(sgl->sge, buf, part0);
747		part1 = (u8 *)end - (u8 *)q->stat;
748		memcpy(q->desc, (u8 *)buf + part0, part1);
749		end = (void *)q->desc + part1;
750	}
751	if ((uintptr_t)end & 8)           /* 0-pad to multiple of 16 */
752		*(u64 *)end = 0;
753}
754
755/**
756 *	ring_tx_db - check and potentially ring a Tx queue's doorbell
757 *	@adap: the adapter
758 *	@q: the Tx queue
759 *	@n: number of new descriptors to give to HW
760 *
761 *	Ring the doorbel for a Tx queue.
762 */
763static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
764{
765	wmb();            /* write descriptors before telling HW */
766	t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
767		     QID(q->cntxt_id) | PIDX(n));
768}
769
770/**
771 *	inline_tx_skb - inline a packet's data into Tx descriptors
772 *	@skb: the packet
773 *	@q: the Tx queue where the packet will be inlined
774 *	@pos: starting position in the Tx queue where to inline the packet
775 *
776 *	Inline a packet's contents directly into Tx descriptors, starting at
777 *	the given position within the Tx DMA ring.
778 *	Most of the complexity of this operation is dealing with wrap arounds
779 *	in the middle of the packet we want to inline.
780 */
781static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
782			  void *pos)
783{
784	u64 *p;
785	int left = (void *)q->stat - pos;
786
787	if (likely(skb->len <= left)) {
788		if (likely(!skb->data_len))
789			skb_copy_from_linear_data(skb, pos, skb->len);
790		else
791			skb_copy_bits(skb, 0, pos, skb->len);
792		pos += skb->len;
793	} else {
794		skb_copy_bits(skb, 0, pos, left);
795		skb_copy_bits(skb, left, q->desc, skb->len - left);
796		pos = (void *)q->desc + (skb->len - left);
797	}
798
799	/* 0-pad to multiple of 16 */
800	p = PTR_ALIGN(pos, 8);
801	if ((uintptr_t)p & 8)
802		*p = 0;
803}
804
805/*
806 * Figure out what HW csum a packet wants and return the appropriate control
807 * bits.
808 */
809static u64 hwcsum(const struct sk_buff *skb)
810{
811	int csum_type;
812	const struct iphdr *iph = ip_hdr(skb);
813
814	if (iph->version == 4) {
815		if (iph->protocol == IPPROTO_TCP)
816			csum_type = TX_CSUM_TCPIP;
817		else if (iph->protocol == IPPROTO_UDP)
818			csum_type = TX_CSUM_UDPIP;
819		else {
820nocsum:			/*
821			 * unknown protocol, disable HW csum
822			 * and hope a bad packet is detected
823			 */
824			return TXPKT_L4CSUM_DIS;
825		}
826	} else {
827		/*
828		 * this doesn't work with extension headers
829		 */
830		const struct ipv6hdr *ip6h = (const struct ipv6hdr *)iph;
831
832		if (ip6h->nexthdr == IPPROTO_TCP)
833			csum_type = TX_CSUM_TCPIP6;
834		else if (ip6h->nexthdr == IPPROTO_UDP)
835			csum_type = TX_CSUM_UDPIP6;
836		else
837			goto nocsum;
838	}
839
840	if (likely(csum_type >= TX_CSUM_TCPIP))
841		return TXPKT_CSUM_TYPE(csum_type) |
842			TXPKT_IPHDR_LEN(skb_network_header_len(skb)) |
843			TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN);
844	else {
845		int start = skb_transport_offset(skb);
846
847		return TXPKT_CSUM_TYPE(csum_type) | TXPKT_CSUM_START(start) |
848			TXPKT_CSUM_LOC(start + skb->csum_offset);
849	}
850}
851
852static void eth_txq_stop(struct sge_eth_txq *q)
853{
854	netif_tx_stop_queue(q->txq);
855	q->q.stops++;
856}
857
858static inline void txq_advance(struct sge_txq *q, unsigned int n)
859{
860	q->in_use += n;
861	q->pidx += n;
862	if (q->pidx >= q->size)
863		q->pidx -= q->size;
864}
865
866/**
867 *	t4_eth_xmit - add a packet to an Ethernet Tx queue
868 *	@skb: the packet
869 *	@dev: the egress net device
870 *
871 *	Add a packet to an SGE Ethernet Tx queue.  Runs with softirqs disabled.
872 */
873netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
874{
875	u32 wr_mid;
876	u64 cntrl, *end;
877	int qidx, credits;
878	unsigned int flits, ndesc;
879	struct adapter *adap;
880	struct sge_eth_txq *q;
881	const struct port_info *pi;
882	struct fw_eth_tx_pkt_wr *wr;
883	struct cpl_tx_pkt_core *cpl;
884	const struct skb_shared_info *ssi;
885	dma_addr_t addr[MAX_SKB_FRAGS + 1];
886
887	/*
888	 * The chip min packet length is 10 octets but play safe and reject
889	 * anything shorter than an Ethernet header.
890	 */
891	if (unlikely(skb->len < ETH_HLEN)) {
892out_free:	dev_kfree_skb(skb);
893		return NETDEV_TX_OK;
894	}
895
896	pi = netdev_priv(dev);
897	adap = pi->adapter;
898	qidx = skb_get_queue_mapping(skb);
899	q = &adap->sge.ethtxq[qidx + pi->first_qset];
900
901	reclaim_completed_tx(adap, &q->q, true);
902
903	flits = calc_tx_flits(skb);
904	ndesc = flits_to_desc(flits);
905	credits = txq_avail(&q->q) - ndesc;
906
907	if (unlikely(credits < 0)) {
908		eth_txq_stop(q);
909		dev_err(adap->pdev_dev,
910			"%s: Tx ring %u full while queue awake!\n",
911			dev->name, qidx);
912		return NETDEV_TX_BUSY;
913	}
914
915	if (!is_eth_imm(skb) &&
916	    unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) {
917		q->mapping_err++;
918		goto out_free;
919	}
920
921	wr_mid = FW_WR_LEN16(DIV_ROUND_UP(flits, 2));
922	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
923		eth_txq_stop(q);
924		wr_mid |= FW_WR_EQUEQ | FW_WR_EQUIQ;
925	}
926
927	wr = (void *)&q->q.desc[q->q.pidx];
928	wr->equiq_to_len16 = htonl(wr_mid);
929	wr->r3 = cpu_to_be64(0);
930	end = (u64 *)wr + flits;
931
932	ssi = skb_shinfo(skb);
933	if (ssi->gso_size) {
934		struct cpl_tx_pkt_lso *lso = (void *)wr;
935		bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
936		int l3hdr_len = skb_network_header_len(skb);
937		int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
938
939		wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) |
940				       FW_WR_IMMDLEN(sizeof(*lso)));
941		lso->c.lso_ctrl = htonl(LSO_OPCODE(CPL_TX_PKT_LSO) |
942					LSO_FIRST_SLICE | LSO_LAST_SLICE |
943					LSO_IPV6(v6) |
944					LSO_ETHHDR_LEN(eth_xtra_len / 4) |
945					LSO_IPHDR_LEN(l3hdr_len / 4) |
946					LSO_TCPHDR_LEN(tcp_hdr(skb)->doff));
947		lso->c.ipid_ofst = htons(0);
948		lso->c.mss = htons(ssi->gso_size);
949		lso->c.seqno_offset = htonl(0);
950		lso->c.len = htonl(skb->len);
951		cpl = (void *)(lso + 1);
952		cntrl = TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
953			TXPKT_IPHDR_LEN(l3hdr_len) |
954			TXPKT_ETHHDR_LEN(eth_xtra_len);
955		q->tso++;
956		q->tx_cso += ssi->gso_segs;
957	} else {
958		int len;
959
960		len = is_eth_imm(skb) ? skb->len + sizeof(*cpl) : sizeof(*cpl);
961		wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) |
962				       FW_WR_IMMDLEN(len));
963		cpl = (void *)(wr + 1);
964		if (skb->ip_summed == CHECKSUM_PARTIAL) {
965			cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS;
966			q->tx_cso++;
967		} else
968			cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS;
969	}
970
971	if (vlan_tx_tag_present(skb)) {
972		q->vlan_ins++;
973		cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(vlan_tx_tag_get(skb));
974	}
975
976	cpl->ctrl0 = htonl(TXPKT_OPCODE(CPL_TX_PKT_XT) |
977			   TXPKT_INTF(pi->tx_chan) | TXPKT_PF(0));
978	cpl->pack = htons(0);
979	cpl->len = htons(skb->len);
980	cpl->ctrl1 = cpu_to_be64(cntrl);
981
982	if (is_eth_imm(skb)) {
983		inline_tx_skb(skb, &q->q, cpl + 1);
984		dev_kfree_skb(skb);
985	} else {
986		int last_desc;
987
988		write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), end, 0,
989			  addr);
990		skb_orphan(skb);
991
992		last_desc = q->q.pidx + ndesc - 1;
993		if (last_desc >= q->q.size)
994			last_desc -= q->q.size;
995		q->q.sdesc[last_desc].skb = skb;
996		q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)(cpl + 1);
997	}
998
999	txq_advance(&q->q, ndesc);
1000
1001	ring_tx_db(adap, &q->q, ndesc);
1002	return NETDEV_TX_OK;
1003}
1004
1005/**
1006 *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1007 *	@q: the SGE control Tx queue
1008 *
1009 *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1010 *	that send only immediate data (presently just the control queues) and
1011 *	thus do not have any sk_buffs to release.
1012 */
1013static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1014{
1015	int hw_cidx = ntohs(q->stat->cidx);
1016	int reclaim = hw_cidx - q->cidx;
1017
1018	if (reclaim < 0)
1019		reclaim += q->size;
1020
1021	q->in_use -= reclaim;
1022	q->cidx = hw_cidx;
1023}
1024
1025/**
1026 *	is_imm - check whether a packet can be sent as immediate data
1027 *	@skb: the packet
1028 *
1029 *	Returns true if a packet can be sent as a WR with immediate data.
1030 */
1031static inline int is_imm(const struct sk_buff *skb)
1032{
1033	return skb->len <= MAX_CTRL_WR_LEN;
1034}
1035
1036/**
1037 *	ctrlq_check_stop - check if a control queue is full and should stop
1038 *	@q: the queue
1039 *	@wr: most recent WR written to the queue
1040 *
1041 *	Check if a control queue has become full and should be stopped.
1042 *	We clean up control queue descriptors very lazily, only when we are out.
1043 *	If the queue is still full after reclaiming any completed descriptors
1044 *	we suspend it and have the last WR wake it up.
1045 */
1046static void ctrlq_check_stop(struct sge_ctrl_txq *q, struct fw_wr_hdr *wr)
1047{
1048	reclaim_completed_tx_imm(&q->q);
1049	if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
1050		wr->lo |= htonl(FW_WR_EQUEQ | FW_WR_EQUIQ);
1051		q->q.stops++;
1052		q->full = 1;
1053	}
1054}
1055
1056/**
1057 *	ctrl_xmit - send a packet through an SGE control Tx queue
1058 *	@q: the control queue
1059 *	@skb: the packet
1060 *
1061 *	Send a packet through an SGE control Tx queue.  Packets sent through
1062 *	a control queue must fit entirely as immediate data.
1063 */
1064static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb)
1065{
1066	unsigned int ndesc;
1067	struct fw_wr_hdr *wr;
1068
1069	if (unlikely(!is_imm(skb))) {
1070		WARN_ON(1);
1071		dev_kfree_skb(skb);
1072		return NET_XMIT_DROP;
1073	}
1074
1075	ndesc = DIV_ROUND_UP(skb->len, sizeof(struct tx_desc));
1076	spin_lock(&q->sendq.lock);
1077
1078	if (unlikely(q->full)) {
1079		skb->priority = ndesc;                  /* save for restart */
1080		__skb_queue_tail(&q->sendq, skb);
1081		spin_unlock(&q->sendq.lock);
1082		return NET_XMIT_CN;
1083	}
1084
1085	wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
1086	inline_tx_skb(skb, &q->q, wr);
1087
1088	txq_advance(&q->q, ndesc);
1089	if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES))
1090		ctrlq_check_stop(q, wr);
1091
1092	ring_tx_db(q->adap, &q->q, ndesc);
1093	spin_unlock(&q->sendq.lock);
1094
1095	kfree_skb(skb);
1096	return NET_XMIT_SUCCESS;
1097}
1098
1099/**
1100 *	restart_ctrlq - restart a suspended control queue
1101 *	@data: the control queue to restart
1102 *
1103 *	Resumes transmission on a suspended Tx control queue.
1104 */
1105static void restart_ctrlq(unsigned long data)
1106{
1107	struct sk_buff *skb;
1108	unsigned int written = 0;
1109	struct sge_ctrl_txq *q = (struct sge_ctrl_txq *)data;
1110
1111	spin_lock(&q->sendq.lock);
1112	reclaim_completed_tx_imm(&q->q);
1113	BUG_ON(txq_avail(&q->q) < TXQ_STOP_THRES);  /* q should be empty */
1114
1115	while ((skb = __skb_dequeue(&q->sendq)) != NULL) {
1116		struct fw_wr_hdr *wr;
1117		unsigned int ndesc = skb->priority;     /* previously saved */
1118
1119		/*
1120		 * Write descriptors and free skbs outside the lock to limit
1121		 * wait times.  q->full is still set so new skbs will be queued.
1122		 */
1123		spin_unlock(&q->sendq.lock);
1124
1125		wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
1126		inline_tx_skb(skb, &q->q, wr);
1127		kfree_skb(skb);
1128
1129		written += ndesc;
1130		txq_advance(&q->q, ndesc);
1131		if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
1132			unsigned long old = q->q.stops;
1133
1134			ctrlq_check_stop(q, wr);
1135			if (q->q.stops != old) {          /* suspended anew */
1136				spin_lock(&q->sendq.lock);
1137				goto ringdb;
1138			}
1139		}
1140		if (written > 16) {
1141			ring_tx_db(q->adap, &q->q, written);
1142			written = 0;
1143		}
1144		spin_lock(&q->sendq.lock);
1145	}
1146	q->full = 0;
1147ringdb: if (written)
1148		ring_tx_db(q->adap, &q->q, written);
1149	spin_unlock(&q->sendq.lock);
1150}
1151
1152/**
1153 *	t4_mgmt_tx - send a management message
1154 *	@adap: the adapter
1155 *	@skb: the packet containing the management message
1156 *
1157 *	Send a management message through control queue 0.
1158 */
1159int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1160{
1161	int ret;
1162
1163	local_bh_disable();
1164	ret = ctrl_xmit(&adap->sge.ctrlq[0], skb);
1165	local_bh_enable();
1166	return ret;
1167}
1168
1169/**
1170 *	is_ofld_imm - check whether a packet can be sent as immediate data
1171 *	@skb: the packet
1172 *
1173 *	Returns true if a packet can be sent as an offload WR with immediate
1174 *	data.  We currently use the same limit as for Ethernet packets.
1175 */
1176static inline int is_ofld_imm(const struct sk_buff *skb)
1177{
1178	return skb->len <= MAX_IMM_TX_PKT_LEN;
1179}
1180
1181/**
1182 *	calc_tx_flits_ofld - calculate # of flits for an offload packet
1183 *	@skb: the packet
1184 *
1185 *	Returns the number of flits needed for the given offload packet.
1186 *	These packets are already fully constructed and no additional headers
1187 *	will be added.
1188 */
1189static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
1190{
1191	unsigned int flits, cnt;
1192
1193	if (is_ofld_imm(skb))
1194		return DIV_ROUND_UP(skb->len, 8);
1195
1196	flits = skb_transport_offset(skb) / 8U;   /* headers */
1197	cnt = skb_shinfo(skb)->nr_frags;
1198	if (skb->tail != skb->transport_header)
1199		cnt++;
1200	return flits + sgl_len(cnt);
1201}
1202
1203/**
1204 *	txq_stop_maperr - stop a Tx queue due to I/O MMU exhaustion
1205 *	@adap: the adapter
1206 *	@q: the queue to stop
1207 *
1208 *	Mark a Tx queue stopped due to I/O MMU exhaustion and resulting
1209 *	inability to map packets.  A periodic timer attempts to restart
1210 *	queues so marked.
1211 */
1212static void txq_stop_maperr(struct sge_ofld_txq *q)
1213{
1214	q->mapping_err++;
1215	q->q.stops++;
1216	set_bit(q->q.cntxt_id, q->adap->sge.txq_maperr);
1217}
1218
1219/**
1220 *	ofldtxq_stop - stop an offload Tx queue that has become full
1221 *	@q: the queue to stop
1222 *	@skb: the packet causing the queue to become full
1223 *
1224 *	Stops an offload Tx queue that has become full and modifies the packet
1225 *	being written to request a wakeup.
1226 */
1227static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb)
1228{
1229	struct fw_wr_hdr *wr = (struct fw_wr_hdr *)skb->data;
1230
1231	wr->lo |= htonl(FW_WR_EQUEQ | FW_WR_EQUIQ);
1232	q->q.stops++;
1233	q->full = 1;
1234}
1235
1236/**
1237 *	service_ofldq - restart a suspended offload queue
1238 *	@q: the offload queue
1239 *
1240 *	Services an offload Tx queue by moving packets from its packet queue
1241 *	to the HW Tx ring.  The function starts and ends with the queue locked.
1242 */
1243static void service_ofldq(struct sge_ofld_txq *q)
1244{
1245	u64 *pos;
1246	int credits;
1247	struct sk_buff *skb;
1248	unsigned int written = 0;
1249	unsigned int flits, ndesc;
1250
1251	while ((skb = skb_peek(&q->sendq)) != NULL && !q->full) {
1252		/*
1253		 * We drop the lock but leave skb on sendq, thus retaining
1254		 * exclusive access to the state of the queue.
1255		 */
1256		spin_unlock(&q->sendq.lock);
1257
1258		reclaim_completed_tx(q->adap, &q->q, false);
1259
1260		flits = skb->priority;                /* previously saved */
1261		ndesc = flits_to_desc(flits);
1262		credits = txq_avail(&q->q) - ndesc;
1263		BUG_ON(credits < 0);
1264		if (unlikely(credits < TXQ_STOP_THRES))
1265			ofldtxq_stop(q, skb);
1266
1267		pos = (u64 *)&q->q.desc[q->q.pidx];
1268		if (is_ofld_imm(skb))
1269			inline_tx_skb(skb, &q->q, pos);
1270		else if (map_skb(q->adap->pdev_dev, skb,
1271				 (dma_addr_t *)skb->head)) {
1272			txq_stop_maperr(q);
1273			spin_lock(&q->sendq.lock);
1274			break;
1275		} else {
1276			int last_desc, hdr_len = skb_transport_offset(skb);
1277
1278			memcpy(pos, skb->data, hdr_len);
1279			write_sgl(skb, &q->q, (void *)pos + hdr_len,
1280				  pos + flits, hdr_len,
1281				  (dma_addr_t *)skb->head);
1282#ifdef CONFIG_NEED_DMA_MAP_STATE
1283			skb->dev = q->adap->port[0];
1284			skb->destructor = deferred_unmap_destructor;
1285#endif
1286			last_desc = q->q.pidx + ndesc - 1;
1287			if (last_desc >= q->q.size)
1288				last_desc -= q->q.size;
1289			q->q.sdesc[last_desc].skb = skb;
1290		}
1291
1292		txq_advance(&q->q, ndesc);
1293		written += ndesc;
1294		if (unlikely(written > 32)) {
1295			ring_tx_db(q->adap, &q->q, written);
1296			written = 0;
1297		}
1298
1299		spin_lock(&q->sendq.lock);
1300		__skb_unlink(skb, &q->sendq);
1301		if (is_ofld_imm(skb))
1302			kfree_skb(skb);
1303	}
1304	if (likely(written))
1305		ring_tx_db(q->adap, &q->q, written);
1306}
1307
1308/**
1309 *	ofld_xmit - send a packet through an offload queue
1310 *	@q: the Tx offload queue
1311 *	@skb: the packet
1312 *
1313 *	Send an offload packet through an SGE offload queue.
1314 */
1315static int ofld_xmit(struct sge_ofld_txq *q, struct sk_buff *skb)
1316{
1317	skb->priority = calc_tx_flits_ofld(skb);       /* save for restart */
1318	spin_lock(&q->sendq.lock);
1319	__skb_queue_tail(&q->sendq, skb);
1320	if (q->sendq.qlen == 1)
1321		service_ofldq(q);
1322	spin_unlock(&q->sendq.lock);
1323	return NET_XMIT_SUCCESS;
1324}
1325
1326/**
1327 *	restart_ofldq - restart a suspended offload queue
1328 *	@data: the offload queue to restart
1329 *
1330 *	Resumes transmission on a suspended Tx offload queue.
1331 */
1332static void restart_ofldq(unsigned long data)
1333{
1334	struct sge_ofld_txq *q = (struct sge_ofld_txq *)data;
1335
1336	spin_lock(&q->sendq.lock);
1337	q->full = 0;            /* the queue actually is completely empty now */
1338	service_ofldq(q);
1339	spin_unlock(&q->sendq.lock);
1340}
1341
1342/**
1343 *	skb_txq - return the Tx queue an offload packet should use
1344 *	@skb: the packet
1345 *
1346 *	Returns the Tx queue an offload packet should use as indicated by bits
1347 *	1-15 in the packet's queue_mapping.
1348 */
1349static inline unsigned int skb_txq(const struct sk_buff *skb)
1350{
1351	return skb->queue_mapping >> 1;
1352}
1353
1354/**
1355 *	is_ctrl_pkt - return whether an offload packet is a control packet
1356 *	@skb: the packet
1357 *
1358 *	Returns whether an offload packet should use an OFLD or a CTRL
1359 *	Tx queue as indicated by bit 0 in the packet's queue_mapping.
1360 */
1361static inline unsigned int is_ctrl_pkt(const struct sk_buff *skb)
1362{
1363	return skb->queue_mapping & 1;
1364}
1365
1366static inline int ofld_send(struct adapter *adap, struct sk_buff *skb)
1367{
1368	unsigned int idx = skb_txq(skb);
1369
1370	if (unlikely(is_ctrl_pkt(skb)))
1371		return ctrl_xmit(&adap->sge.ctrlq[idx], skb);
1372	return ofld_xmit(&adap->sge.ofldtxq[idx], skb);
1373}
1374
1375/**
1376 *	t4_ofld_send - send an offload packet
1377 *	@adap: the adapter
1378 *	@skb: the packet
1379 *
1380 *	Sends an offload packet.  We use the packet queue_mapping to select the
1381 *	appropriate Tx queue as follows: bit 0 indicates whether the packet
1382 *	should be sent as regular or control, bits 1-15 select the queue.
1383 */
1384int t4_ofld_send(struct adapter *adap, struct sk_buff *skb)
1385{
1386	int ret;
1387
1388	local_bh_disable();
1389	ret = ofld_send(adap, skb);
1390	local_bh_enable();
1391	return ret;
1392}
1393
1394/**
1395 *	cxgb4_ofld_send - send an offload packet
1396 *	@dev: the net device
1397 *	@skb: the packet
1398 *
1399 *	Sends an offload packet.  This is an exported version of @t4_ofld_send,
1400 *	intended for ULDs.
1401 */
1402int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb)
1403{
1404	return t4_ofld_send(netdev2adap(dev), skb);
1405}
1406EXPORT_SYMBOL(cxgb4_ofld_send);
1407
1408static inline void copy_frags(struct skb_shared_info *ssi,
1409			      const struct pkt_gl *gl, unsigned int offset)
1410{
1411	unsigned int n;
1412
1413	/* usually there's just one frag */
1414	ssi->frags[0].page = gl->frags[0].page;
1415	ssi->frags[0].page_offset = gl->frags[0].page_offset + offset;
1416	ssi->frags[0].size = gl->frags[0].size - offset;
1417	ssi->nr_frags = gl->nfrags;
1418	n = gl->nfrags - 1;
1419	if (n)
1420		memcpy(&ssi->frags[1], &gl->frags[1], n * sizeof(skb_frag_t));
1421
1422	/* get a reference to the last page, we don't own it */
1423	get_page(gl->frags[n].page);
1424}
1425
1426/**
1427 *	cxgb4_pktgl_to_skb - build an sk_buff from a packet gather list
1428 *	@gl: the gather list
1429 *	@skb_len: size of sk_buff main body if it carries fragments
1430 *	@pull_len: amount of data to move to the sk_buff's main body
1431 *
1432 *	Builds an sk_buff from the given packet gather list.  Returns the
1433 *	sk_buff or %NULL if sk_buff allocation failed.
1434 */
1435struct sk_buff *cxgb4_pktgl_to_skb(const struct pkt_gl *gl,
1436				   unsigned int skb_len, unsigned int pull_len)
1437{
1438	struct sk_buff *skb;
1439
1440	/*
1441	 * Below we rely on RX_COPY_THRES being less than the smallest Rx buffer
1442	 * size, which is expected since buffers are at least PAGE_SIZEd.
1443	 * In this case packets up to RX_COPY_THRES have only one fragment.
1444	 */
1445	if (gl->tot_len <= RX_COPY_THRES) {
1446		skb = dev_alloc_skb(gl->tot_len);
1447		if (unlikely(!skb))
1448			goto out;
1449		__skb_put(skb, gl->tot_len);
1450		skb_copy_to_linear_data(skb, gl->va, gl->tot_len);
1451	} else {
1452		skb = dev_alloc_skb(skb_len);
1453		if (unlikely(!skb))
1454			goto out;
1455		__skb_put(skb, pull_len);
1456		skb_copy_to_linear_data(skb, gl->va, pull_len);
1457
1458		copy_frags(skb_shinfo(skb), gl, pull_len);
1459		skb->len = gl->tot_len;
1460		skb->data_len = skb->len - pull_len;
1461		skb->truesize += skb->data_len;
1462	}
1463out:	return skb;
1464}
1465EXPORT_SYMBOL(cxgb4_pktgl_to_skb);
1466
1467/**
1468 *	t4_pktgl_free - free a packet gather list
1469 *	@gl: the gather list
1470 *
1471 *	Releases the pages of a packet gather list.  We do not own the last
1472 *	page on the list and do not free it.
1473 */
1474static void t4_pktgl_free(const struct pkt_gl *gl)
1475{
1476	int n;
1477	const skb_frag_t *p;
1478
1479	for (p = gl->frags, n = gl->nfrags - 1; n--; p++)
1480		put_page(p->page);
1481}
1482
1483/*
1484 * Process an MPS trace packet.  Give it an unused protocol number so it won't
1485 * be delivered to anyone and send it to the stack for capture.
1486 */
1487static noinline int handle_trace_pkt(struct adapter *adap,
1488				     const struct pkt_gl *gl)
1489{
1490	struct sk_buff *skb;
1491	struct cpl_trace_pkt *p;
1492
1493	skb = cxgb4_pktgl_to_skb(gl, RX_PULL_LEN, RX_PULL_LEN);
1494	if (unlikely(!skb)) {
1495		t4_pktgl_free(gl);
1496		return 0;
1497	}
1498
1499	p = (struct cpl_trace_pkt *)skb->data;
1500	__skb_pull(skb, sizeof(*p));
1501	skb_reset_mac_header(skb);
1502	skb->protocol = htons(0xffff);
1503	skb->dev = adap->port[0];
1504	netif_receive_skb(skb);
1505	return 0;
1506}
1507
1508static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
1509		   const struct cpl_rx_pkt *pkt)
1510{
1511	int ret;
1512	struct sk_buff *skb;
1513
1514	skb = napi_get_frags(&rxq->rspq.napi);
1515	if (unlikely(!skb)) {
1516		t4_pktgl_free(gl);
1517		rxq->stats.rx_drops++;
1518		return;
1519	}
1520
1521	copy_frags(skb_shinfo(skb), gl, RX_PKT_PAD);
1522	skb->len = gl->tot_len - RX_PKT_PAD;
1523	skb->data_len = skb->len;
1524	skb->truesize += skb->data_len;
1525	skb->ip_summed = CHECKSUM_UNNECESSARY;
1526	skb_record_rx_queue(skb, rxq->rspq.idx);
1527	if (rxq->rspq.netdev->features & NETIF_F_RXHASH)
1528		skb->rxhash = (__force u32)pkt->rsshdr.hash_val;
1529
1530	if (unlikely(pkt->vlan_ex)) {
1531		struct port_info *pi = netdev_priv(rxq->rspq.netdev);
1532		struct vlan_group *grp = pi->vlan_grp;
1533
1534		rxq->stats.vlan_ex++;
1535		if (likely(grp)) {
1536			ret = vlan_gro_frags(&rxq->rspq.napi, grp,
1537					     ntohs(pkt->vlan));
1538			goto stats;
1539		}
1540	}
1541	ret = napi_gro_frags(&rxq->rspq.napi);
1542stats:	if (ret == GRO_HELD)
1543		rxq->stats.lro_pkts++;
1544	else if (ret == GRO_MERGED || ret == GRO_MERGED_FREE)
1545		rxq->stats.lro_merged++;
1546	rxq->stats.pkts++;
1547	rxq->stats.rx_cso++;
1548}
1549
1550/**
1551 *	t4_ethrx_handler - process an ingress ethernet packet
1552 *	@q: the response queue that received the packet
1553 *	@rsp: the response queue descriptor holding the RX_PKT message
1554 *	@si: the gather list of packet fragments
1555 *
1556 *	Process an ingress ethernet packet and deliver it to the stack.
1557 */
1558int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
1559		     const struct pkt_gl *si)
1560{
1561	bool csum_ok;
1562	struct sk_buff *skb;
1563	struct port_info *pi;
1564	const struct cpl_rx_pkt *pkt;
1565	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
1566
1567	if (unlikely(*(u8 *)rsp == CPL_TRACE_PKT))
1568		return handle_trace_pkt(q->adap, si);
1569
1570	pkt = (const struct cpl_rx_pkt *)rsp;
1571	csum_ok = pkt->csum_calc && !pkt->err_vec;
1572	if ((pkt->l2info & htonl(RXF_TCP)) &&
1573	    (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
1574		do_gro(rxq, si, pkt);
1575		return 0;
1576	}
1577
1578	skb = cxgb4_pktgl_to_skb(si, RX_PKT_SKB_LEN, RX_PULL_LEN);
1579	if (unlikely(!skb)) {
1580		t4_pktgl_free(si);
1581		rxq->stats.rx_drops++;
1582		return 0;
1583	}
1584
1585	__skb_pull(skb, RX_PKT_PAD);      /* remove ethernet header padding */
1586	skb->protocol = eth_type_trans(skb, q->netdev);
1587	skb_record_rx_queue(skb, q->idx);
1588	if (skb->dev->features & NETIF_F_RXHASH)
1589		skb->rxhash = (__force u32)pkt->rsshdr.hash_val;
1590
1591	pi = netdev_priv(skb->dev);
1592	rxq->stats.pkts++;
1593
1594	if (csum_ok && (pi->rx_offload & RX_CSO) &&
1595	    (pkt->l2info & htonl(RXF_UDP | RXF_TCP))) {
1596		if (!pkt->ip_frag) {
1597			skb->ip_summed = CHECKSUM_UNNECESSARY;
1598			rxq->stats.rx_cso++;
1599		} else if (pkt->l2info & htonl(RXF_IP)) {
1600			__sum16 c = (__force __sum16)pkt->csum;
1601			skb->csum = csum_unfold(c);
1602			skb->ip_summed = CHECKSUM_COMPLETE;
1603			rxq->stats.rx_cso++;
1604		}
1605	} else
1606		skb->ip_summed = CHECKSUM_NONE;
1607
1608	if (unlikely(pkt->vlan_ex)) {
1609		struct vlan_group *grp = pi->vlan_grp;
1610
1611		rxq->stats.vlan_ex++;
1612		if (likely(grp))
1613			vlan_hwaccel_receive_skb(skb, grp, ntohs(pkt->vlan));
1614		else
1615			dev_kfree_skb_any(skb);
1616	} else
1617		netif_receive_skb(skb);
1618
1619	return 0;
1620}
1621
1622/**
1623 *	restore_rx_bufs - put back a packet's Rx buffers
1624 *	@si: the packet gather list
1625 *	@q: the SGE free list
1626 *	@frags: number of FL buffers to restore
1627 *
1628 *	Puts back on an FL the Rx buffers associated with @si.  The buffers
1629 *	have already been unmapped and are left unmapped, we mark them so to
1630 *	prevent further unmapping attempts.
1631 *
1632 *	This function undoes a series of @unmap_rx_buf calls when we find out
1633 *	that the current packet can't be processed right away afterall and we
1634 *	need to come back to it later.  This is a very rare event and there's
1635 *	no effort to make this particularly efficient.
1636 */
1637static void restore_rx_bufs(const struct pkt_gl *si, struct sge_fl *q,
1638			    int frags)
1639{
1640	struct rx_sw_desc *d;
1641
1642	while (frags--) {
1643		if (q->cidx == 0)
1644			q->cidx = q->size - 1;
1645		else
1646			q->cidx--;
1647		d = &q->sdesc[q->cidx];
1648		d->page = si->frags[frags].page;
1649		d->dma_addr |= RX_UNMAPPED_BUF;
1650		q->avail++;
1651	}
1652}
1653
1654/**
1655 *	is_new_response - check if a response is newly written
1656 *	@r: the response descriptor
1657 *	@q: the response queue
1658 *
1659 *	Returns true if a response descriptor contains a yet unprocessed
1660 *	response.
1661 */
1662static inline bool is_new_response(const struct rsp_ctrl *r,
1663				   const struct sge_rspq *q)
1664{
1665	return RSPD_GEN(r->type_gen) == q->gen;
1666}
1667
1668/**
1669 *	rspq_next - advance to the next entry in a response queue
1670 *	@q: the queue
1671 *
1672 *	Updates the state of a response queue to advance it to the next entry.
1673 */
1674static inline void rspq_next(struct sge_rspq *q)
1675{
1676	q->cur_desc = (void *)q->cur_desc + q->iqe_len;
1677	if (unlikely(++q->cidx == q->size)) {
1678		q->cidx = 0;
1679		q->gen ^= 1;
1680		q->cur_desc = q->desc;
1681	}
1682}
1683
1684/**
1685 *	process_responses - process responses from an SGE response queue
1686 *	@q: the ingress queue to process
1687 *	@budget: how many responses can be processed in this round
1688 *
1689 *	Process responses from an SGE response queue up to the supplied budget.
1690 *	Responses include received packets as well as control messages from FW
1691 *	or HW.
1692 *
1693 *	Additionally choose the interrupt holdoff time for the next interrupt
1694 *	on this queue.  If the system is under memory shortage use a fairly
1695 *	long delay to help recovery.
1696 */
1697static int process_responses(struct sge_rspq *q, int budget)
1698{
1699	int ret, rsp_type;
1700	int budget_left = budget;
1701	const struct rsp_ctrl *rc;
1702	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
1703
1704	while (likely(budget_left)) {
1705		rc = (void *)q->cur_desc + (q->iqe_len - sizeof(*rc));
1706		if (!is_new_response(rc, q))
1707			break;
1708
1709		rmb();
1710		rsp_type = RSPD_TYPE(rc->type_gen);
1711		if (likely(rsp_type == RSP_TYPE_FLBUF)) {
1712			skb_frag_t *fp;
1713			struct pkt_gl si;
1714			const struct rx_sw_desc *rsd;
1715			u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags;
1716
1717			if (len & RSPD_NEWBUF) {
1718				if (likely(q->offset > 0)) {
1719					free_rx_bufs(q->adap, &rxq->fl, 1);
1720					q->offset = 0;
1721				}
1722				len = RSPD_LEN(len);
1723			}
1724			si.tot_len = len;
1725
1726			/* gather packet fragments */
1727			for (frags = 0, fp = si.frags; ; frags++, fp++) {
1728				rsd = &rxq->fl.sdesc[rxq->fl.cidx];
1729				bufsz = get_buf_size(rsd);
1730				fp->page = rsd->page;
1731				fp->page_offset = q->offset;
1732				fp->size = min(bufsz, len);
1733				len -= fp->size;
1734				if (!len)
1735					break;
1736				unmap_rx_buf(q->adap, &rxq->fl);
1737			}
1738
1739			/*
1740			 * Last buffer remains mapped so explicitly make it
1741			 * coherent for CPU access.
1742			 */
1743			dma_sync_single_for_cpu(q->adap->pdev_dev,
1744						get_buf_addr(rsd),
1745						fp->size, DMA_FROM_DEVICE);
1746
1747			si.va = page_address(si.frags[0].page) +
1748				si.frags[0].page_offset;
1749			prefetch(si.va);
1750
1751			si.nfrags = frags + 1;
1752			ret = q->handler(q, q->cur_desc, &si);
1753			if (likely(ret == 0))
1754				q->offset += ALIGN(fp->size, FL_ALIGN);
1755			else
1756				restore_rx_bufs(&si, &rxq->fl, frags);
1757		} else if (likely(rsp_type == RSP_TYPE_CPL)) {
1758			ret = q->handler(q, q->cur_desc, NULL);
1759		} else {
1760			ret = q->handler(q, (const __be64 *)rc, CXGB4_MSG_AN);
1761		}
1762
1763		if (unlikely(ret)) {
1764			/* couldn't process descriptor, back off for recovery */
1765			q->next_intr_params = QINTR_TIMER_IDX(NOMEM_TMR_IDX);
1766			break;
1767		}
1768
1769		rspq_next(q);
1770		budget_left--;
1771	}
1772
1773	if (q->offset >= 0 && rxq->fl.size - rxq->fl.avail >= 16)
1774		__refill_fl(q->adap, &rxq->fl);
1775	return budget - budget_left;
1776}
1777
1778/**
1779 *	napi_rx_handler - the NAPI handler for Rx processing
1780 *	@napi: the napi instance
1781 *	@budget: how many packets we can process in this round
1782 *
1783 *	Handler for new data events when using NAPI.  This does not need any
1784 *	locking or protection from interrupts as data interrupts are off at
1785 *	this point and other adapter interrupts do not interfere (the latter
1786 *	in not a concern at all with MSI-X as non-data interrupts then have
1787 *	a separate handler).
1788 */
1789static int napi_rx_handler(struct napi_struct *napi, int budget)
1790{
1791	unsigned int params;
1792	struct sge_rspq *q = container_of(napi, struct sge_rspq, napi);
1793	int work_done = process_responses(q, budget);
1794
1795	if (likely(work_done < budget)) {
1796		napi_complete(napi);
1797		params = q->next_intr_params;
1798		q->next_intr_params = q->intr_params;
1799	} else
1800		params = QINTR_TIMER_IDX(7);
1801
1802	t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), CIDXINC(work_done) |
1803		     INGRESSQID((u32)q->cntxt_id) | SEINTARM(params));
1804	return work_done;
1805}
1806
1807/*
1808 * The MSI-X interrupt handler for an SGE response queue.
1809 */
1810irqreturn_t t4_sge_intr_msix(int irq, void *cookie)
1811{
1812	struct sge_rspq *q = cookie;
1813
1814	napi_schedule(&q->napi);
1815	return IRQ_HANDLED;
1816}
1817
1818/*
1819 * Process the indirect interrupt entries in the interrupt queue and kick off
1820 * NAPI for each queue that has generated an entry.
1821 */
1822static unsigned int process_intrq(struct adapter *adap)
1823{
1824	unsigned int credits;
1825	const struct rsp_ctrl *rc;
1826	struct sge_rspq *q = &adap->sge.intrq;
1827
1828	spin_lock(&adap->sge.intrq_lock);
1829	for (credits = 0; ; credits++) {
1830		rc = (void *)q->cur_desc + (q->iqe_len - sizeof(*rc));
1831		if (!is_new_response(rc, q))
1832			break;
1833
1834		rmb();
1835		if (RSPD_TYPE(rc->type_gen) == RSP_TYPE_INTR) {
1836			unsigned int qid = ntohl(rc->pldbuflen_qid);
1837
1838			napi_schedule(&adap->sge.ingr_map[qid]->napi);
1839		}
1840
1841		rspq_next(q);
1842	}
1843
1844	t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), CIDXINC(credits) |
1845		     INGRESSQID(q->cntxt_id) | SEINTARM(q->intr_params));
1846	spin_unlock(&adap->sge.intrq_lock);
1847	return credits;
1848}
1849
1850/*
1851 * The MSI interrupt handler, which handles data events from SGE response queues
1852 * as well as error and other async events as they all use the same MSI vector.
1853 */
1854static irqreturn_t t4_intr_msi(int irq, void *cookie)
1855{
1856	struct adapter *adap = cookie;
1857
1858	t4_slow_intr_handler(adap);
1859	process_intrq(adap);
1860	return IRQ_HANDLED;
1861}
1862
1863/*
1864 * Interrupt handler for legacy INTx interrupts.
1865 * Handles data events from SGE response queues as well as error and other
1866 * async events as they all use the same interrupt line.
1867 */
1868static irqreturn_t t4_intr_intx(int irq, void *cookie)
1869{
1870	struct adapter *adap = cookie;
1871
1872	t4_write_reg(adap, MYPF_REG(PCIE_PF_CLI), 0);
1873	if (t4_slow_intr_handler(adap) | process_intrq(adap))
1874		return IRQ_HANDLED;
1875	return IRQ_NONE;             /* probably shared interrupt */
1876}
1877
1878/**
1879 *	t4_intr_handler - select the top-level interrupt handler
1880 *	@adap: the adapter
1881 *
1882 *	Selects the top-level interrupt handler based on the type of interrupts
1883 *	(MSI-X, MSI, or INTx).
1884 */
1885irq_handler_t t4_intr_handler(struct adapter *adap)
1886{
1887	if (adap->flags & USING_MSIX)
1888		return t4_sge_intr_msix;
1889	if (adap->flags & USING_MSI)
1890		return t4_intr_msi;
1891	return t4_intr_intx;
1892}
1893
1894static void sge_rx_timer_cb(unsigned long data)
1895{
1896	unsigned long m;
1897	unsigned int i, cnt[2];
1898	struct adapter *adap = (struct adapter *)data;
1899	struct sge *s = &adap->sge;
1900
1901	for (i = 0; i < ARRAY_SIZE(s->starving_fl); i++)
1902		for (m = s->starving_fl[i]; m; m &= m - 1) {
1903			struct sge_eth_rxq *rxq;
1904			unsigned int id = __ffs(m) + i * BITS_PER_LONG;
1905			struct sge_fl *fl = s->egr_map[id];
1906
1907			clear_bit(id, s->starving_fl);
1908			smp_mb__after_clear_bit();
1909
1910			if (fl_starving(fl)) {
1911				rxq = container_of(fl, struct sge_eth_rxq, fl);
1912				if (napi_reschedule(&rxq->rspq.napi))
1913					fl->starving++;
1914				else
1915					set_bit(id, s->starving_fl);
1916			}
1917		}
1918
1919	t4_write_reg(adap, SGE_DEBUG_INDEX, 13);
1920	cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH);
1921	cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW);
1922
1923	for (i = 0; i < 2; i++)
1924		if (cnt[i] >= s->starve_thres) {
1925			if (s->idma_state[i] || cnt[i] == 0xffffffff)
1926				continue;
1927			s->idma_state[i] = 1;
1928			t4_write_reg(adap, SGE_DEBUG_INDEX, 11);
1929			m = t4_read_reg(adap, SGE_DEBUG_DATA_LOW) >> (i * 16);
1930			dev_warn(adap->pdev_dev,
1931				 "SGE idma%u starvation detected for "
1932				 "queue %lu\n", i, m & 0xffff);
1933		} else if (s->idma_state[i])
1934			s->idma_state[i] = 0;
1935
1936	mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
1937}
1938
1939static void sge_tx_timer_cb(unsigned long data)
1940{
1941	unsigned long m;
1942	unsigned int i, budget;
1943	struct adapter *adap = (struct adapter *)data;
1944	struct sge *s = &adap->sge;
1945
1946	for (i = 0; i < ARRAY_SIZE(s->txq_maperr); i++)
1947		for (m = s->txq_maperr[i]; m; m &= m - 1) {
1948			unsigned long id = __ffs(m) + i * BITS_PER_LONG;
1949			struct sge_ofld_txq *txq = s->egr_map[id];
1950
1951			clear_bit(id, s->txq_maperr);
1952			tasklet_schedule(&txq->qresume_tsk);
1953		}
1954
1955	budget = MAX_TIMER_TX_RECLAIM;
1956	i = s->ethtxq_rover;
1957	do {
1958		struct sge_eth_txq *q = &s->ethtxq[i];
1959
1960		if (q->q.in_use &&
1961		    time_after_eq(jiffies, q->txq->trans_start + HZ / 100) &&
1962		    __netif_tx_trylock(q->txq)) {
1963			int avail = reclaimable(&q->q);
1964
1965			if (avail) {
1966				if (avail > budget)
1967					avail = budget;
1968
1969				free_tx_desc(adap, &q->q, avail, true);
1970				q->q.in_use -= avail;
1971				budget -= avail;
1972			}
1973			__netif_tx_unlock(q->txq);
1974		}
1975
1976		if (++i >= s->ethqsets)
1977			i = 0;
1978	} while (budget && i != s->ethtxq_rover);
1979	s->ethtxq_rover = i;
1980	mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
1981}
1982
1983int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
1984		     struct net_device *dev, int intr_idx,
1985		     struct sge_fl *fl, rspq_handler_t hnd)
1986{
1987	int ret, flsz = 0;
1988	struct fw_iq_cmd c;
1989	struct port_info *pi = netdev_priv(dev);
1990
1991	/* Size needs to be multiple of 16, including status entry. */
1992	iq->size = roundup(iq->size, 16);
1993
1994	iq->desc = alloc_ring(adap->pdev_dev, iq->size, iq->iqe_len, 0,
1995			      &iq->phys_addr, NULL, 0);
1996	if (!iq->desc)
1997		return -ENOMEM;
1998
1999	memset(&c, 0, sizeof(c));
2000	c.op_to_vfn = htonl(FW_CMD_OP(FW_IQ_CMD) | FW_CMD_REQUEST |
2001			    FW_CMD_WRITE | FW_CMD_EXEC |
2002			    FW_IQ_CMD_PFN(adap->fn) | FW_IQ_CMD_VFN(0));
2003	c.alloc_to_len16 = htonl(FW_IQ_CMD_ALLOC | FW_IQ_CMD_IQSTART(1) |
2004				 FW_LEN16(c));
2005	c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
2006		FW_IQ_CMD_IQASYNCH(fwevtq) | FW_IQ_CMD_VIID(pi->viid) |
2007		FW_IQ_CMD_IQANDST(intr_idx < 0) | FW_IQ_CMD_IQANUD(1) |
2008		FW_IQ_CMD_IQANDSTINDEX(intr_idx >= 0 ? intr_idx :
2009							-intr_idx - 1));
2010	c.iqdroprss_to_iqesize = htons(FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
2011		FW_IQ_CMD_IQGTSMODE |
2012		FW_IQ_CMD_IQINTCNTTHRESH(iq->pktcnt_idx) |
2013		FW_IQ_CMD_IQESIZE(ilog2(iq->iqe_len) - 4));
2014	c.iqsize = htons(iq->size);
2015	c.iqaddr = cpu_to_be64(iq->phys_addr);
2016
2017	if (fl) {
2018		fl->size = roundup(fl->size, 8);
2019		fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64),
2020				      sizeof(struct rx_sw_desc), &fl->addr,
2021				      &fl->sdesc, STAT_LEN);
2022		if (!fl->desc)
2023			goto fl_nomem;
2024
2025		flsz = fl->size / 8 + STAT_LEN / sizeof(struct tx_desc);
2026		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN |
2027					    FW_IQ_CMD_FL0PADEN);
2028		c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN(2) |
2029				FW_IQ_CMD_FL0FBMAX(3));
2030		c.fl0size = htons(flsz);
2031		c.fl0addr = cpu_to_be64(fl->addr);
2032	}
2033
2034	ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c);
2035	if (ret)
2036		goto err;
2037
2038	netif_napi_add(dev, &iq->napi, napi_rx_handler, 64);
2039	iq->cur_desc = iq->desc;
2040	iq->cidx = 0;
2041	iq->gen = 1;
2042	iq->next_intr_params = iq->intr_params;
2043	iq->cntxt_id = ntohs(c.iqid);
2044	iq->abs_id = ntohs(c.physiqid);
2045	iq->size--;                           /* subtract status entry */
2046	iq->adap = adap;
2047	iq->netdev = dev;
2048	iq->handler = hnd;
2049
2050	/* set offset to -1 to distinguish ingress queues without FL */
2051	iq->offset = fl ? 0 : -1;
2052
2053	adap->sge.ingr_map[iq->cntxt_id] = iq;
2054
2055	if (fl) {
2056		fl->cntxt_id = ntohs(c.fl0id);
2057		fl->avail = fl->pend_cred = 0;
2058		fl->pidx = fl->cidx = 0;
2059		fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0;
2060		adap->sge.egr_map[fl->cntxt_id] = fl;
2061		refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL);
2062	}
2063	return 0;
2064
2065fl_nomem:
2066	ret = -ENOMEM;
2067err:
2068	if (iq->desc) {
2069		dma_free_coherent(adap->pdev_dev, iq->size * iq->iqe_len,
2070				  iq->desc, iq->phys_addr);
2071		iq->desc = NULL;
2072	}
2073	if (fl && fl->desc) {
2074		kfree(fl->sdesc);
2075		fl->sdesc = NULL;
2076		dma_free_coherent(adap->pdev_dev, flsz * sizeof(struct tx_desc),
2077				  fl->desc, fl->addr);
2078		fl->desc = NULL;
2079	}
2080	return ret;
2081}
2082
2083static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
2084{
2085	q->in_use = 0;
2086	q->cidx = q->pidx = 0;
2087	q->stops = q->restarts = 0;
2088	q->stat = (void *)&q->desc[q->size];
2089	q->cntxt_id = id;
2090	adap->sge.egr_map[id] = q;
2091}
2092
2093int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
2094			 struct net_device *dev, struct netdev_queue *netdevq,
2095			 unsigned int iqid)
2096{
2097	int ret, nentries;
2098	struct fw_eq_eth_cmd c;
2099	struct port_info *pi = netdev_priv(dev);
2100
2101	/* Add status entries */
2102	nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
2103
2104	txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
2105			sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
2106			&txq->q.phys_addr, &txq->q.sdesc, STAT_LEN);
2107	if (!txq->q.desc)
2108		return -ENOMEM;
2109
2110	memset(&c, 0, sizeof(c));
2111	c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_ETH_CMD) | FW_CMD_REQUEST |
2112			    FW_CMD_WRITE | FW_CMD_EXEC |
2113			    FW_EQ_ETH_CMD_PFN(adap->fn) | FW_EQ_ETH_CMD_VFN(0));
2114	c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC |
2115				 FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
2116	c.viid_pkd = htonl(FW_EQ_ETH_CMD_VIID(pi->viid));
2117	c.fetchszm_to_iqid = htonl(FW_EQ_ETH_CMD_HOSTFCMODE(2) |
2118				   FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) |
2119				   FW_EQ_ETH_CMD_IQID(iqid));
2120	c.dcaen_to_eqsize = htonl(FW_EQ_ETH_CMD_FBMIN(2) |
2121				  FW_EQ_ETH_CMD_FBMAX(3) |
2122				  FW_EQ_ETH_CMD_CIDXFTHRESH(5) |
2123				  FW_EQ_ETH_CMD_EQSIZE(nentries));
2124	c.eqaddr = cpu_to_be64(txq->q.phys_addr);
2125
2126	ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c);
2127	if (ret) {
2128		kfree(txq->q.sdesc);
2129		txq->q.sdesc = NULL;
2130		dma_free_coherent(adap->pdev_dev,
2131				  nentries * sizeof(struct tx_desc),
2132				  txq->q.desc, txq->q.phys_addr);
2133		txq->q.desc = NULL;
2134		return ret;
2135	}
2136
2137	init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_GET(ntohl(c.eqid_pkd)));
2138	txq->txq = netdevq;
2139	txq->tso = txq->tx_cso = txq->vlan_ins = 0;
2140	txq->mapping_err = 0;
2141	return 0;
2142}
2143
2144int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
2145			  struct net_device *dev, unsigned int iqid,
2146			  unsigned int cmplqid)
2147{
2148	int ret, nentries;
2149	struct fw_eq_ctrl_cmd c;
2150	struct port_info *pi = netdev_priv(dev);
2151
2152	/* Add status entries */
2153	nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
2154
2155	txq->q.desc = alloc_ring(adap->pdev_dev, nentries,
2156				 sizeof(struct tx_desc), 0, &txq->q.phys_addr,
2157				 NULL, 0);
2158	if (!txq->q.desc)
2159		return -ENOMEM;
2160
2161	c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_CTRL_CMD) | FW_CMD_REQUEST |
2162			    FW_CMD_WRITE | FW_CMD_EXEC |
2163			    FW_EQ_CTRL_CMD_PFN(adap->fn) |
2164			    FW_EQ_CTRL_CMD_VFN(0));
2165	c.alloc_to_len16 = htonl(FW_EQ_CTRL_CMD_ALLOC |
2166				 FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
2167	c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_CMPLIQID(cmplqid));
2168	c.physeqid_pkd = htonl(0);
2169	c.fetchszm_to_iqid = htonl(FW_EQ_CTRL_CMD_HOSTFCMODE(2) |
2170				   FW_EQ_CTRL_CMD_PCIECHN(pi->tx_chan) |
2171				   FW_EQ_CTRL_CMD_IQID(iqid));
2172	c.dcaen_to_eqsize = htonl(FW_EQ_CTRL_CMD_FBMIN(2) |
2173				  FW_EQ_CTRL_CMD_FBMAX(3) |
2174				  FW_EQ_CTRL_CMD_CIDXFTHRESH(5) |
2175				  FW_EQ_CTRL_CMD_EQSIZE(nentries));
2176	c.eqaddr = cpu_to_be64(txq->q.phys_addr);
2177
2178	ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c);
2179	if (ret) {
2180		dma_free_coherent(adap->pdev_dev,
2181				  nentries * sizeof(struct tx_desc),
2182				  txq->q.desc, txq->q.phys_addr);
2183		txq->q.desc = NULL;
2184		return ret;
2185	}
2186
2187	init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_GET(ntohl(c.cmpliqid_eqid)));
2188	txq->adap = adap;
2189	skb_queue_head_init(&txq->sendq);
2190	tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq);
2191	txq->full = 0;
2192	return 0;
2193}
2194
2195int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
2196			  struct net_device *dev, unsigned int iqid)
2197{
2198	int ret, nentries;
2199	struct fw_eq_ofld_cmd c;
2200	struct port_info *pi = netdev_priv(dev);
2201
2202	/* Add status entries */
2203	nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
2204
2205	txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
2206			sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
2207			&txq->q.phys_addr, &txq->q.sdesc, STAT_LEN);
2208	if (!txq->q.desc)
2209		return -ENOMEM;
2210
2211	memset(&c, 0, sizeof(c));
2212	c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_OFLD_CMD) | FW_CMD_REQUEST |
2213			    FW_CMD_WRITE | FW_CMD_EXEC |
2214			    FW_EQ_OFLD_CMD_PFN(adap->fn) |
2215			    FW_EQ_OFLD_CMD_VFN(0));
2216	c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC |
2217				 FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
2218	c.fetchszm_to_iqid = htonl(FW_EQ_OFLD_CMD_HOSTFCMODE(2) |
2219				   FW_EQ_OFLD_CMD_PCIECHN(pi->tx_chan) |
2220				   FW_EQ_OFLD_CMD_IQID(iqid));
2221	c.dcaen_to_eqsize = htonl(FW_EQ_OFLD_CMD_FBMIN(2) |
2222				  FW_EQ_OFLD_CMD_FBMAX(3) |
2223				  FW_EQ_OFLD_CMD_CIDXFTHRESH(5) |
2224				  FW_EQ_OFLD_CMD_EQSIZE(nentries));
2225	c.eqaddr = cpu_to_be64(txq->q.phys_addr);
2226
2227	ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c);
2228	if (ret) {
2229		kfree(txq->q.sdesc);
2230		txq->q.sdesc = NULL;
2231		dma_free_coherent(adap->pdev_dev,
2232				  nentries * sizeof(struct tx_desc),
2233				  txq->q.desc, txq->q.phys_addr);
2234		txq->q.desc = NULL;
2235		return ret;
2236	}
2237
2238	init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_GET(ntohl(c.eqid_pkd)));
2239	txq->adap = adap;
2240	skb_queue_head_init(&txq->sendq);
2241	tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq);
2242	txq->full = 0;
2243	txq->mapping_err = 0;
2244	return 0;
2245}
2246
2247static void free_txq(struct adapter *adap, struct sge_txq *q)
2248{
2249	dma_free_coherent(adap->pdev_dev,
2250			  q->size * sizeof(struct tx_desc) + STAT_LEN,
2251			  q->desc, q->phys_addr);
2252	q->cntxt_id = 0;
2253	q->sdesc = NULL;
2254	q->desc = NULL;
2255}
2256
2257static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
2258			 struct sge_fl *fl)
2259{
2260	unsigned int fl_id = fl ? fl->cntxt_id : 0xffff;
2261
2262	adap->sge.ingr_map[rq->cntxt_id] = NULL;
2263	t4_iq_free(adap, adap->fn, adap->fn, 0, FW_IQ_TYPE_FL_INT_CAP,
2264		   rq->cntxt_id, fl_id, 0xffff);
2265	dma_free_coherent(adap->pdev_dev, (rq->size + 1) * rq->iqe_len,
2266			  rq->desc, rq->phys_addr);
2267	netif_napi_del(&rq->napi);
2268	rq->netdev = NULL;
2269	rq->cntxt_id = rq->abs_id = 0;
2270	rq->desc = NULL;
2271
2272	if (fl) {
2273		free_rx_bufs(adap, fl, fl->avail);
2274		dma_free_coherent(adap->pdev_dev, fl->size * 8 + STAT_LEN,
2275				  fl->desc, fl->addr);
2276		kfree(fl->sdesc);
2277		fl->sdesc = NULL;
2278		fl->cntxt_id = 0;
2279		fl->desc = NULL;
2280	}
2281}
2282
2283/**
2284 *	t4_free_sge_resources - free SGE resources
2285 *	@adap: the adapter
2286 *
2287 *	Frees resources used by the SGE queue sets.
2288 */
2289void t4_free_sge_resources(struct adapter *adap)
2290{
2291	int i;
2292	struct sge_eth_rxq *eq = adap->sge.ethrxq;
2293	struct sge_eth_txq *etq = adap->sge.ethtxq;
2294	struct sge_ofld_rxq *oq = adap->sge.ofldrxq;
2295
2296	/* clean up Ethernet Tx/Rx queues */
2297	for (i = 0; i < adap->sge.ethqsets; i++, eq++, etq++) {
2298		if (eq->rspq.desc)
2299			free_rspq_fl(adap, &eq->rspq, &eq->fl);
2300		if (etq->q.desc) {
2301			t4_eth_eq_free(adap, adap->fn, adap->fn, 0,
2302				       etq->q.cntxt_id);
2303			free_tx_desc(adap, &etq->q, etq->q.in_use, true);
2304			kfree(etq->q.sdesc);
2305			free_txq(adap, &etq->q);
2306		}
2307	}
2308
2309	/* clean up RDMA and iSCSI Rx queues */
2310	for (i = 0; i < adap->sge.ofldqsets; i++, oq++) {
2311		if (oq->rspq.desc)
2312			free_rspq_fl(adap, &oq->rspq, &oq->fl);
2313	}
2314	for (i = 0, oq = adap->sge.rdmarxq; i < adap->sge.rdmaqs; i++, oq++) {
2315		if (oq->rspq.desc)
2316			free_rspq_fl(adap, &oq->rspq, &oq->fl);
2317	}
2318
2319	/* clean up offload Tx queues */
2320	for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) {
2321		struct sge_ofld_txq *q = &adap->sge.ofldtxq[i];
2322
2323		if (q->q.desc) {
2324			tasklet_kill(&q->qresume_tsk);
2325			t4_ofld_eq_free(adap, adap->fn, adap->fn, 0,
2326					q->q.cntxt_id);
2327			free_tx_desc(adap, &q->q, q->q.in_use, false);
2328			kfree(q->q.sdesc);
2329			__skb_queue_purge(&q->sendq);
2330			free_txq(adap, &q->q);
2331		}
2332	}
2333
2334	/* clean up control Tx queues */
2335	for (i = 0; i < ARRAY_SIZE(adap->sge.ctrlq); i++) {
2336		struct sge_ctrl_txq *cq = &adap->sge.ctrlq[i];
2337
2338		if (cq->q.desc) {
2339			tasklet_kill(&cq->qresume_tsk);
2340			t4_ctrl_eq_free(adap, adap->fn, adap->fn, 0,
2341					cq->q.cntxt_id);
2342			__skb_queue_purge(&cq->sendq);
2343			free_txq(adap, &cq->q);
2344		}
2345	}
2346
2347	if (adap->sge.fw_evtq.desc)
2348		free_rspq_fl(adap, &adap->sge.fw_evtq, NULL);
2349
2350	if (adap->sge.intrq.desc)
2351		free_rspq_fl(adap, &adap->sge.intrq, NULL);
2352
2353	/* clear the reverse egress queue map */
2354	memset(adap->sge.egr_map, 0, sizeof(adap->sge.egr_map));
2355}
2356
2357void t4_sge_start(struct adapter *adap)
2358{
2359	adap->sge.ethtxq_rover = 0;
2360	mod_timer(&adap->sge.rx_timer, jiffies + RX_QCHECK_PERIOD);
2361	mod_timer(&adap->sge.tx_timer, jiffies + TX_QCHECK_PERIOD);
2362}
2363
2364/**
2365 *	t4_sge_stop - disable SGE operation
2366 *	@adap: the adapter
2367 *
2368 *	Stop tasklets and timers associated with the DMA engine.  Note that
2369 *	this is effective only if measures have been taken to disable any HW
2370 *	events that may restart them.
2371 */
2372void t4_sge_stop(struct adapter *adap)
2373{
2374	int i;
2375	struct sge *s = &adap->sge;
2376
2377	if (in_interrupt())  /* actions below require waiting */
2378		return;
2379
2380	if (s->rx_timer.function)
2381		del_timer_sync(&s->rx_timer);
2382	if (s->tx_timer.function)
2383		del_timer_sync(&s->tx_timer);
2384
2385	for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) {
2386		struct sge_ofld_txq *q = &s->ofldtxq[i];
2387
2388		if (q->q.desc)
2389			tasklet_kill(&q->qresume_tsk);
2390	}
2391	for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++) {
2392		struct sge_ctrl_txq *cq = &s->ctrlq[i];
2393
2394		if (cq->q.desc)
2395			tasklet_kill(&cq->qresume_tsk);
2396	}
2397}
2398
2399/**
2400 *	t4_sge_init - initialize SGE
2401 *	@adap: the adapter
2402 *
2403 *	Performs SGE initialization needed every time after a chip reset.
2404 *	We do not initialize any of the queues here, instead the driver
2405 *	top-level must request them individually.
2406 */
2407void t4_sge_init(struct adapter *adap)
2408{
2409	unsigned int i, v;
2410	struct sge *s = &adap->sge;
2411	unsigned int fl_align_log = ilog2(FL_ALIGN);
2412
2413	t4_set_reg_field(adap, SGE_CONTROL, PKTSHIFT_MASK |
2414			 INGPADBOUNDARY_MASK | EGRSTATUSPAGESIZE,
2415			 INGPADBOUNDARY(fl_align_log - 5) | PKTSHIFT(2) |
2416			 RXPKTCPLMODE |
2417			 (STAT_LEN == 128 ? EGRSTATUSPAGESIZE : 0));
2418
2419	for (i = v = 0; i < 32; i += 4)
2420		v |= (PAGE_SHIFT - 10) << i;
2421	t4_write_reg(adap, SGE_HOST_PAGE_SIZE, v);
2422	t4_write_reg(adap, SGE_FL_BUFFER_SIZE0, PAGE_SIZE);
2423#if FL_PG_ORDER > 0
2424	t4_write_reg(adap, SGE_FL_BUFFER_SIZE1, PAGE_SIZE << FL_PG_ORDER);
2425#endif
2426	t4_write_reg(adap, SGE_INGRESS_RX_THRESHOLD,
2427		     THRESHOLD_0(s->counter_val[0]) |
2428		     THRESHOLD_1(s->counter_val[1]) |
2429		     THRESHOLD_2(s->counter_val[2]) |
2430		     THRESHOLD_3(s->counter_val[3]));
2431	t4_write_reg(adap, SGE_TIMER_VALUE_0_AND_1,
2432		     TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[0])) |
2433		     TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[1])));
2434	t4_write_reg(adap, SGE_TIMER_VALUE_2_AND_3,
2435		     TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[2])) |
2436		     TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[3])));
2437	t4_write_reg(adap, SGE_TIMER_VALUE_4_AND_5,
2438		     TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[4])) |
2439		     TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[5])));
2440	setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
2441	setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
2442	s->starve_thres = core_ticks_per_usec(adap) * 1000000;  /* 1 s */
2443	s->idma_state[0] = s->idma_state[1] = 0;
2444	spin_lock_init(&s->intrq_lock);
2445}
2446