cxgb_sge.c revision 194259
1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 194259 2009-06-15 19:50:03Z sam $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/conf.h>
39#include <machine/bus.h>
40#include <machine/resource.h>
41#include <sys/bus_dma.h>
42#include <sys/rman.h>
43#include <sys/queue.h>
44#include <sys/sysctl.h>
45#include <sys/taskqueue.h>
46
47#include <sys/proc.h>
48#include <sys/sbuf.h>
49#include <sys/sched.h>
50#include <sys/smp.h>
51#include <sys/systm.h>
52#include <sys/syslog.h>
53
54#include <netinet/in_systm.h>
55#include <netinet/in.h>
56#include <netinet/ip.h>
57#include <netinet/tcp.h>
58
59#include <dev/pci/pcireg.h>
60#include <dev/pci/pcivar.h>
61
62#include <vm/vm.h>
63#include <vm/pmap.h>
64
65#include <cxgb_include.h>
66#include <sys/mvec.h>
67
68int      txq_fills = 0;
69/*
70 * XXX don't re-enable this until TOE stops assuming
71 * we have an m_ext
72 */
73static int recycle_enable = 0;
74extern int cxgb_txq_buf_ring_size;
75int cxgb_cached_allocations;
76int cxgb_cached;
77int cxgb_ext_freed = 0;
78int cxgb_ext_inited = 0;
79int fl_q_size = 0;
80int jumbo_q_size = 0;
81
82extern int cxgb_use_16k_clusters;
83extern int cxgb_pcpu_cache_enable;
84extern int nmbjumbo4;
85extern int nmbjumbo9;
86extern int nmbjumbo16;
87extern int multiq_tx_enable;
88extern int coalesce_tx_enable;
89extern int wakeup_tx_thread;
90
91#define USE_GTS 0
92
93#define SGE_RX_SM_BUF_SIZE	1536
94#define SGE_RX_DROP_THRES	16
95#define SGE_RX_COPY_THRES	128
96
97/*
98 * Period of the Tx buffer reclaim timer.  This timer does not need to run
99 * frequently as Tx buffers are usually reclaimed by new Tx packets.
100 */
101#define TX_RECLAIM_PERIOD       (hz >> 1)
102
103/*
104 * Values for sge_txq.flags
105 */
106enum {
107	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
108	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
109};
110
111struct tx_desc {
112	uint64_t	flit[TX_DESC_FLITS];
113} __packed;
114
115struct rx_desc {
116	uint32_t	addr_lo;
117	uint32_t	len_gen;
118	uint32_t	gen2;
119	uint32_t	addr_hi;
120} __packed;;
121
122struct rsp_desc {               /* response queue descriptor */
123	struct rss_header	rss_hdr;
124	uint32_t		flags;
125	uint32_t		len_cq;
126	uint8_t			imm_data[47];
127	uint8_t			intr_gen;
128} __packed;
129
130#define RX_SW_DESC_MAP_CREATED	(1 << 0)
131#define TX_SW_DESC_MAP_CREATED	(1 << 1)
132#define RX_SW_DESC_INUSE        (1 << 3)
133#define TX_SW_DESC_MAPPED       (1 << 4)
134
135#define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
136#define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
137#define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
138#define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
139
140struct tx_sw_desc {                /* SW state per Tx descriptor */
141	struct mbuf_iovec mi;
142	bus_dmamap_t	map;
143	int		flags;
144};
145
146struct rx_sw_desc {                /* SW state per Rx descriptor */
147	caddr_t	         rxsd_cl;
148	caddr_t	         data;
149	bus_dmamap_t	  map;
150	int		  flags;
151};
152
153struct txq_state {
154	unsigned int compl;
155	unsigned int gen;
156	unsigned int pidx;
157};
158
159struct refill_fl_cb_arg {
160	int               error;
161	bus_dma_segment_t seg;
162	int               nseg;
163};
164
165/*
166 * Maps a number of flits to the number of Tx descriptors that can hold them.
167 * The formula is
168 *
169 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
170 *
171 * HW allows up to 4 descriptors to be combined into a WR.
172 */
173static uint8_t flit_desc_map[] = {
174	0,
175#if SGE_NUM_GENBITS == 1
176	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
178	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
179	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
180#elif SGE_NUM_GENBITS == 2
181	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
182	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
183	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
184	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
185#else
186# error "SGE_NUM_GENBITS must be 1 or 2"
187#endif
188};
189
190
191int cxgb_debug = 0;
192
193static void sge_timer_cb(void *arg);
194static void sge_timer_reclaim(void *arg, int ncount);
195static void sge_txq_reclaim_handler(void *arg, int ncount);
196
197/**
198 *	reclaim_completed_tx - reclaims completed Tx descriptors
199 *	@adapter: the adapter
200 *	@q: the Tx queue to reclaim completed descriptors from
201 *
202 *	Reclaims Tx descriptors that the SGE has indicated it has processed,
203 *	and frees the associated buffers if possible.  Called with the Tx
204 *	queue's lock held.
205 */
206static __inline int
207reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
208{
209	int reclaim = desc_reclaimable(q);
210
211	if (reclaim < reclaim_min)
212		return (0);
213
214	mtx_assert(&q->lock, MA_OWNED);
215	if (reclaim > 0) {
216		t3_free_tx_desc(q, reclaim);
217		q->cleaned += reclaim;
218		q->in_use -= reclaim;
219	}
220	return (reclaim);
221}
222
223/**
224 *	should_restart_tx - are there enough resources to restart a Tx queue?
225 *	@q: the Tx queue
226 *
227 *	Checks if there are enough descriptors to restart a suspended Tx queue.
228 */
229static __inline int
230should_restart_tx(const struct sge_txq *q)
231{
232	unsigned int r = q->processed - q->cleaned;
233
234	return q->in_use - r < (q->size >> 1);
235}
236
237/**
238 *	t3_sge_init - initialize SGE
239 *	@adap: the adapter
240 *	@p: the SGE parameters
241 *
242 *	Performs SGE initialization needed every time after a chip reset.
243 *	We do not initialize any of the queue sets here, instead the driver
244 *	top-level must request those individually.  We also do not enable DMA
245 *	here, that should be done after the queues have been set up.
246 */
247void
248t3_sge_init(adapter_t *adap, struct sge_params *p)
249{
250	u_int ctrl, ups;
251
252	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
253
254	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
255	       F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
256	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
257	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
258#if SGE_NUM_GENBITS == 1
259	ctrl |= F_EGRGENCTRL;
260#endif
261	if (adap->params.rev > 0) {
262		if (!(adap->flags & (USING_MSIX | USING_MSI)))
263			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
264	}
265	t3_write_reg(adap, A_SG_CONTROL, ctrl);
266	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
267		     V_LORCQDRBTHRSH(512));
268	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
269	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
270		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
271	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
272		     adap->params.rev < T3_REV_C ? 1000 : 500);
273	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
274	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
275	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
276	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
277	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
278}
279
280
281/**
282 *	sgl_len - calculates the size of an SGL of the given capacity
283 *	@n: the number of SGL entries
284 *
285 *	Calculates the number of flits needed for a scatter/gather list that
286 *	can hold the given number of entries.
287 */
288static __inline unsigned int
289sgl_len(unsigned int n)
290{
291	return ((3 * n) / 2 + (n & 1));
292}
293
294/**
295 *	get_imm_packet - return the next ingress packet buffer from a response
296 *	@resp: the response descriptor containing the packet data
297 *
298 *	Return a packet containing the immediate data of the given response.
299 */
300static int
301get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
302{
303
304	m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
305	m->m_ext.ext_buf = NULL;
306	m->m_ext.ext_type = 0;
307	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
308	return (0);
309}
310
311static __inline u_int
312flits_to_desc(u_int n)
313{
314	return (flit_desc_map[n]);
315}
316
317#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
318		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
319		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
320		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
321		    F_HIRCQPARITYERROR)
322#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
323#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
324		      F_RSPQDISABLED)
325
326/**
327 *	t3_sge_err_intr_handler - SGE async event interrupt handler
328 *	@adapter: the adapter
329 *
330 *	Interrupt handler for SGE asynchronous (non-data) events.
331 */
332void
333t3_sge_err_intr_handler(adapter_t *adapter)
334{
335	unsigned int v, status;
336
337	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
338	if (status & SGE_PARERR)
339		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
340			 status & SGE_PARERR);
341	if (status & SGE_FRAMINGERR)
342		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
343			 status & SGE_FRAMINGERR);
344	if (status & F_RSPQCREDITOVERFOW)
345		CH_ALERT(adapter, "SGE response queue credit overflow\n");
346
347	if (status & F_RSPQDISABLED) {
348		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
349
350		CH_ALERT(adapter,
351			 "packet delivered to disabled response queue (0x%x)\n",
352			 (v >> S_RSPQ0DISABLED) & 0xff);
353	}
354
355	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
356	if (status & SGE_FATALERR)
357		t3_fatal_err(adapter);
358}
359
360void
361t3_sge_prep(adapter_t *adap, struct sge_params *p)
362{
363	int i, nqsets;
364
365	nqsets = min(SGE_QSETS, mp_ncpus*4);
366
367	fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
368
369	while (!powerof2(fl_q_size))
370		fl_q_size--;
371#if __FreeBSD_version >= 700111
372	if (cxgb_use_16k_clusters)
373		jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
374	else
375		jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
376#else
377	jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
378#endif
379	while (!powerof2(jumbo_q_size))
380		jumbo_q_size--;
381
382	/* XXX Does ETHER_ALIGN need to be accounted for here? */
383	p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
384
385	for (i = 0; i < SGE_QSETS; ++i) {
386		struct qset_params *q = p->qset + i;
387
388		if (adap->params.nports > 2) {
389			q->coalesce_usecs = 50;
390		} else {
391#ifdef INVARIANTS
392			q->coalesce_usecs = 10;
393#else
394			q->coalesce_usecs = 5;
395#endif
396		}
397		q->polling = 0;
398		q->rspq_size = RSPQ_Q_SIZE;
399		q->fl_size = fl_q_size;
400		q->jumbo_size = jumbo_q_size;
401		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
402		q->txq_size[TXQ_OFLD] = 1024;
403		q->txq_size[TXQ_CTRL] = 256;
404		q->cong_thres = 0;
405	}
406}
407
408int
409t3_sge_alloc(adapter_t *sc)
410{
411
412	/* The parent tag. */
413	if (bus_dma_tag_create( NULL,			/* parent */
414				1, 0,			/* algnmnt, boundary */
415				BUS_SPACE_MAXADDR,	/* lowaddr */
416				BUS_SPACE_MAXADDR,	/* highaddr */
417				NULL, NULL,		/* filter, filterarg */
418				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
419				BUS_SPACE_UNRESTRICTED, /* nsegments */
420				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
421				0,			/* flags */
422				NULL, NULL,		/* lock, lockarg */
423				&sc->parent_dmat)) {
424		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
425		return (ENOMEM);
426	}
427
428	/*
429	 * DMA tag for normal sized RX frames
430	 */
431	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
432		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
433		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
434		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
435		return (ENOMEM);
436	}
437
438	/*
439	 * DMA tag for jumbo sized RX frames.
440	 */
441	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
442		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
443		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
444		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
445		return (ENOMEM);
446	}
447
448	/*
449	 * DMA tag for TX frames.
450	 */
451	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
452		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
453		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
454		NULL, NULL, &sc->tx_dmat)) {
455		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
456		return (ENOMEM);
457	}
458
459	return (0);
460}
461
462int
463t3_sge_free(struct adapter * sc)
464{
465
466	if (sc->tx_dmat != NULL)
467		bus_dma_tag_destroy(sc->tx_dmat);
468
469	if (sc->rx_jumbo_dmat != NULL)
470		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
471
472	if (sc->rx_dmat != NULL)
473		bus_dma_tag_destroy(sc->rx_dmat);
474
475	if (sc->parent_dmat != NULL)
476		bus_dma_tag_destroy(sc->parent_dmat);
477
478	return (0);
479}
480
481void
482t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
483{
484
485	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
486	qs->rspq.polling = 0 /* p->polling */;
487}
488
489#if !defined(__i386__) && !defined(__amd64__)
490static void
491refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
492{
493	struct refill_fl_cb_arg *cb_arg = arg;
494
495	cb_arg->error = error;
496	cb_arg->seg = segs[0];
497	cb_arg->nseg = nseg;
498
499}
500#endif
501/**
502 *	refill_fl - refill an SGE free-buffer list
503 *	@sc: the controller softc
504 *	@q: the free-list to refill
505 *	@n: the number of new buffers to allocate
506 *
507 *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
508 *	The caller must assure that @n does not exceed the queue's capacity.
509 */
510static void
511refill_fl(adapter_t *sc, struct sge_fl *q, int n)
512{
513	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
514	struct rx_desc *d = &q->desc[q->pidx];
515	struct refill_fl_cb_arg cb_arg;
516	caddr_t cl;
517	int err, count = 0;
518	int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
519
520	cb_arg.error = 0;
521	while (n--) {
522		/*
523		 * We only allocate a cluster, mbuf allocation happens after rx
524		 */
525		if ((cl = cxgb_cache_get(q->zone)) == NULL) {
526			log(LOG_WARNING, "Failed to allocate cluster\n");
527			goto done;
528		}
529
530		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
531			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
532				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
533				uma_zfree(q->zone, cl);
534				goto done;
535			}
536			sd->flags |= RX_SW_DESC_MAP_CREATED;
537		}
538#if !defined(__i386__) && !defined(__amd64__)
539		err = bus_dmamap_load(q->entry_tag, sd->map,
540		    cl + header_size, q->buf_size,
541		    refill_fl_cb, &cb_arg, 0);
542
543		if (err != 0 || cb_arg.error) {
544			log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
545			/*
546			 * XXX free cluster
547			 */
548			return;
549		}
550#else
551		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
552#endif
553		sd->flags |= RX_SW_DESC_INUSE;
554		sd->rxsd_cl = cl;
555		sd->data = cl + header_size;
556		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
557		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
558		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
559		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
560
561		d++;
562		sd++;
563
564		if (++q->pidx == q->size) {
565			q->pidx = 0;
566			q->gen ^= 1;
567			sd = q->sdesc;
568			d = q->desc;
569		}
570		q->credits++;
571		count++;
572	}
573
574done:
575	if (count)
576		t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
577}
578
579
580/**
581 *	free_rx_bufs - free the Rx buffers on an SGE free list
582 *	@sc: the controle softc
583 *	@q: the SGE free list to clean up
584 *
585 *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
586 *	this queue should be stopped before calling this function.
587 */
588static void
589free_rx_bufs(adapter_t *sc, struct sge_fl *q)
590{
591	u_int cidx = q->cidx;
592
593	while (q->credits--) {
594		struct rx_sw_desc *d = &q->sdesc[cidx];
595
596		if (d->flags & RX_SW_DESC_INUSE) {
597			bus_dmamap_unload(q->entry_tag, d->map);
598			bus_dmamap_destroy(q->entry_tag, d->map);
599			uma_zfree(q->zone, d->rxsd_cl);
600		}
601		d->rxsd_cl = NULL;
602		if (++cidx == q->size)
603			cidx = 0;
604	}
605}
606
607static __inline void
608__refill_fl(adapter_t *adap, struct sge_fl *fl)
609{
610	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
611}
612
613static __inline void
614__refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
615{
616	if ((fl->size - fl->credits) < max)
617		refill_fl(adap, fl, min(max, fl->size - fl->credits));
618}
619
620void
621refill_fl_service(adapter_t *adap, struct sge_fl *fl)
622{
623	__refill_fl_lt(adap, fl, 512);
624}
625
626/**
627 *	recycle_rx_buf - recycle a receive buffer
628 *	@adapter: the adapter
629 *	@q: the SGE free list
630 *	@idx: index of buffer to recycle
631 *
632 *	Recycles the specified buffer on the given free list by adding it at
633 *	the next available slot on the list.
634 */
635static void
636recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
637{
638	struct rx_desc *from = &q->desc[idx];
639	struct rx_desc *to   = &q->desc[q->pidx];
640
641	q->sdesc[q->pidx] = q->sdesc[idx];
642	to->addr_lo = from->addr_lo;        // already big endian
643	to->addr_hi = from->addr_hi;        // likewise
644	wmb();
645	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
646	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
647	q->credits++;
648
649	if (++q->pidx == q->size) {
650		q->pidx = 0;
651		q->gen ^= 1;
652	}
653	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
654}
655
656static void
657alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
658{
659	uint32_t *addr;
660
661	addr = arg;
662	*addr = segs[0].ds_addr;
663}
664
665static int
666alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
667    bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
668    bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
669{
670	size_t len = nelem * elem_size;
671	void *s = NULL;
672	void *p = NULL;
673	int err;
674
675	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
676				      BUS_SPACE_MAXADDR_32BIT,
677				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
678				      len, 0, NULL, NULL, tag)) != 0) {
679		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
680		return (ENOMEM);
681	}
682
683	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
684				    map)) != 0) {
685		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
686		return (ENOMEM);
687	}
688
689	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
690	bzero(p, len);
691	*(void **)desc = p;
692
693	if (sw_size) {
694		len = nelem * sw_size;
695		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
696		*(void **)sdesc = s;
697	}
698	if (parent_entry_tag == NULL)
699		return (0);
700
701	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
702				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
703		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
704				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
705		                      NULL, NULL, entry_tag)) != 0) {
706		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
707		return (ENOMEM);
708	}
709	return (0);
710}
711
712static void
713sge_slow_intr_handler(void *arg, int ncount)
714{
715	adapter_t *sc = arg;
716
717	t3_slow_intr_handler(sc);
718}
719
720/**
721 *	sge_timer_cb - perform periodic maintenance of an SGE qset
722 *	@data: the SGE queue set to maintain
723 *
724 *	Runs periodically from a timer to perform maintenance of an SGE queue
725 *	set.  It performs two tasks:
726 *
727 *	a) Cleans up any completed Tx descriptors that may still be pending.
728 *	Normal descriptor cleanup happens when new packets are added to a Tx
729 *	queue so this timer is relatively infrequent and does any cleanup only
730 *	if the Tx queue has not seen any new packets in a while.  We make a
731 *	best effort attempt to reclaim descriptors, in that we don't wait
732 *	around if we cannot get a queue's lock (which most likely is because
733 *	someone else is queueing new packets and so will also handle the clean
734 *	up).  Since control queues use immediate data exclusively we don't
735 *	bother cleaning them up here.
736 *
737 *	b) Replenishes Rx queues that have run out due to memory shortage.
738 *	Normally new Rx buffers are added when existing ones are consumed but
739 *	when out of memory a queue can become empty.  We try to add only a few
740 *	buffers here, the queue will be replenished fully as these new buffers
741 *	are used up if memory shortage has subsided.
742 *
743 *	c) Return coalesced response queue credits in case a response queue is
744 *	starved.
745 *
746 *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
747 *	fifo overflows and the FW doesn't implement any recovery scheme yet.
748 */
749static void
750sge_timer_cb(void *arg)
751{
752	adapter_t *sc = arg;
753#ifndef IFNET_MULTIQUEUE
754	struct port_info *pi;
755	struct sge_qset *qs;
756	struct sge_txq  *txq;
757	int i, j;
758	int reclaim_ofl, refill_rx;
759
760	for (i = 0; i < sc->params.nports; i++) {
761		pi = &sc->port[i];
762		for (j = 0; j < pi->nqsets; j++) {
763			qs = &sc->sge.qs[pi->first_qset + j];
764			txq = &qs->txq[0];
765			reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
766			refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
767			    (qs->fl[1].credits < qs->fl[1].size));
768			if (reclaim_ofl || refill_rx) {
769				taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
770				break;
771			}
772		}
773	}
774#endif
775	if (sc->params.nports > 2) {
776		int i;
777
778		for_each_port(sc, i) {
779			struct port_info *pi = &sc->port[i];
780
781			t3_write_reg(sc, A_SG_KDOORBELL,
782				     F_SELEGRCNTX |
783				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
784		}
785	}
786	if (sc->open_device_map != 0)
787		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
788}
789
790/*
791 * This is meant to be a catch-all function to keep sge state private
792 * to sge.c
793 *
794 */
795int
796t3_sge_init_adapter(adapter_t *sc)
797{
798	callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
799	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
800	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
801	mi_init();
802	cxgb_cache_init();
803	return (0);
804}
805
806int
807t3_sge_reset_adapter(adapter_t *sc)
808{
809	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
810	return (0);
811}
812
813int
814t3_sge_init_port(struct port_info *pi)
815{
816	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
817	return (0);
818}
819
820void
821t3_sge_deinit_sw(adapter_t *sc)
822{
823
824	mi_deinit();
825}
826
827/**
828 *	refill_rspq - replenish an SGE response queue
829 *	@adapter: the adapter
830 *	@q: the response queue to replenish
831 *	@credits: how many new responses to make available
832 *
833 *	Replenishes a response queue by making the supplied number of responses
834 *	available to HW.
835 */
836static __inline void
837refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
838{
839
840	/* mbufs are allocated on demand when a rspq entry is processed. */
841	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
842		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
843}
844
845static __inline void
846sge_txq_reclaim_(struct sge_txq *txq, int force)
847{
848
849	if (desc_reclaimable(txq) < 16)
850		return;
851	if (mtx_trylock(&txq->lock) == 0)
852		return;
853	reclaim_completed_tx_(txq, 16);
854	mtx_unlock(&txq->lock);
855
856}
857
858static void
859sge_txq_reclaim_handler(void *arg, int ncount)
860{
861	struct sge_txq *q = arg;
862
863	sge_txq_reclaim_(q, TRUE);
864}
865
866
867
868static void
869sge_timer_reclaim(void *arg, int ncount)
870{
871	struct port_info *pi = arg;
872	int i, nqsets = pi->nqsets;
873	adapter_t *sc = pi->adapter;
874	struct sge_qset *qs;
875	struct sge_txq *txq;
876	struct mtx *lock;
877
878#ifdef IFNET_MULTIQUEUE
879	panic("%s should not be called with multiqueue support\n", __FUNCTION__);
880#endif
881	for (i = 0; i < nqsets; i++) {
882		qs = &sc->sge.qs[pi->first_qset + i];
883
884		txq = &qs->txq[TXQ_OFLD];
885		sge_txq_reclaim_(txq, FALSE);
886
887		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
888			    &sc->sge.qs[0].rspq.lock;
889
890		if (mtx_trylock(lock)) {
891			/* XXX currently assume that we are *NOT* polling */
892			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
893
894			if (qs->fl[0].credits < qs->fl[0].size - 16)
895				__refill_fl(sc, &qs->fl[0]);
896			if (qs->fl[1].credits < qs->fl[1].size - 16)
897				__refill_fl(sc, &qs->fl[1]);
898
899			if (status & (1 << qs->rspq.cntxt_id)) {
900				if (qs->rspq.credits) {
901					refill_rspq(sc, &qs->rspq, 1);
902					qs->rspq.credits--;
903					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
904					    1 << qs->rspq.cntxt_id);
905				}
906			}
907			mtx_unlock(lock);
908		}
909	}
910}
911
912/**
913 *	init_qset_cntxt - initialize an SGE queue set context info
914 *	@qs: the queue set
915 *	@id: the queue set id
916 *
917 *	Initializes the TIDs and context ids for the queues of a queue set.
918 */
919static void
920init_qset_cntxt(struct sge_qset *qs, u_int id)
921{
922
923	qs->rspq.cntxt_id = id;
924	qs->fl[0].cntxt_id = 2 * id;
925	qs->fl[1].cntxt_id = 2 * id + 1;
926	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
927	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
928	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
929	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
930	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
931
932	mbufq_init(&qs->txq[TXQ_ETH].sendq);
933	mbufq_init(&qs->txq[TXQ_OFLD].sendq);
934	mbufq_init(&qs->txq[TXQ_CTRL].sendq);
935}
936
937
938static void
939txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
940{
941	txq->in_use += ndesc;
942	/*
943	 * XXX we don't handle stopping of queue
944	 * presumably start handles this when we bump against the end
945	 */
946	txqs->gen = txq->gen;
947	txq->unacked += ndesc;
948	txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
949	txq->unacked &= 31;
950	txqs->pidx = txq->pidx;
951	txq->pidx += ndesc;
952#ifdef INVARIANTS
953	if (((txqs->pidx > txq->cidx) &&
954		(txq->pidx < txqs->pidx) &&
955		(txq->pidx >= txq->cidx)) ||
956	    ((txqs->pidx < txq->cidx) &&
957		(txq->pidx >= txq-> cidx)) ||
958	    ((txqs->pidx < txq->cidx) &&
959		(txq->cidx < txqs->pidx)))
960		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
961		    txqs->pidx, txq->pidx, txq->cidx);
962#endif
963	if (txq->pidx >= txq->size) {
964		txq->pidx -= txq->size;
965		txq->gen ^= 1;
966	}
967
968}
969
970/**
971 *	calc_tx_descs - calculate the number of Tx descriptors for a packet
972 *	@m: the packet mbufs
973 *      @nsegs: the number of segments
974 *
975 * 	Returns the number of Tx descriptors needed for the given Ethernet
976 * 	packet.  Ethernet packets require addition of WR and CPL headers.
977 */
978static __inline unsigned int
979calc_tx_descs(const struct mbuf *m, int nsegs)
980{
981	unsigned int flits;
982
983	if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
984		return 1;
985
986	flits = sgl_len(nsegs) + 2;
987#ifdef TSO_SUPPORTED
988	if (m->m_pkthdr.csum_flags & CSUM_TSO)
989		flits++;
990#endif
991	return flits_to_desc(flits);
992}
993
994static unsigned int
995busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
996    struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
997{
998	struct mbuf *m0;
999	int err, pktlen, pass = 0;
1000
1001retry:
1002	err = 0;
1003	m0 = *m;
1004	pktlen = m0->m_pkthdr.len;
1005#if defined(__i386__) || defined(__amd64__)
1006	if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
1007		goto done;
1008	} else
1009#endif
1010		err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
1011
1012	if (err == 0) {
1013		goto done;
1014	}
1015	if (err == EFBIG && pass == 0) {
1016		pass = 1;
1017		/* Too many segments, try to defrag */
1018		m0 = m_defrag(m0, M_DONTWAIT);
1019		if (m0 == NULL) {
1020			m_freem(*m);
1021			*m = NULL;
1022			return (ENOBUFS);
1023		}
1024		*m = m0;
1025		goto retry;
1026	} else if (err == ENOMEM) {
1027		return (err);
1028	} if (err) {
1029		if (cxgb_debug)
1030			printf("map failure err=%d pktlen=%d\n", err, pktlen);
1031		m_freem(m0);
1032		*m = NULL;
1033		return (err);
1034	}
1035done:
1036#if !defined(__i386__) && !defined(__amd64__)
1037	bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1038#endif
1039	txsd->flags |= TX_SW_DESC_MAPPED;
1040
1041	return (0);
1042}
1043
1044/**
1045 *	make_sgl - populate a scatter/gather list for a packet
1046 *	@sgp: the SGL to populate
1047 *	@segs: the packet dma segments
1048 *	@nsegs: the number of segments
1049 *
1050 *	Generates a scatter/gather list for the buffers that make up a packet
1051 *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1052 *	appropriately.
1053 */
1054static __inline void
1055make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1056{
1057	int i, idx;
1058
1059	for (idx = 0, i = 0; i < nsegs; i++) {
1060		/*
1061		 * firmware doesn't like empty segments
1062		 */
1063		if (segs[i].ds_len == 0)
1064			continue;
1065		if (i && idx == 0)
1066			++sgp;
1067
1068		sgp->len[idx] = htobe32(segs[i].ds_len);
1069		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1070		idx ^= 1;
1071	}
1072
1073	if (idx) {
1074		sgp->len[idx] = 0;
1075		sgp->addr[idx] = 0;
1076	}
1077}
1078
1079/**
1080 *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1081 *	@adap: the adapter
1082 *	@q: the Tx queue
1083 *
1084 *	Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1085 *	where the HW is going to sleep just after we checked, however,
1086 *	then the interrupt handler will detect the outstanding TX packet
1087 *	and ring the doorbell for us.
1088 *
1089 *	When GTS is disabled we unconditionally ring the doorbell.
1090 */
1091static __inline void
1092check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1093{
1094#if USE_GTS
1095	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1096	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1097		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1098#ifdef T3_TRACE
1099		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1100			  q->cntxt_id);
1101#endif
1102		t3_write_reg(adap, A_SG_KDOORBELL,
1103			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1104	}
1105#else
1106	wmb();            /* write descriptors before telling HW */
1107	t3_write_reg(adap, A_SG_KDOORBELL,
1108		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1109#endif
1110}
1111
1112static __inline void
1113wr_gen2(struct tx_desc *d, unsigned int gen)
1114{
1115#if SGE_NUM_GENBITS == 2
1116	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1117#endif
1118}
1119
1120/**
1121 *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1122 *	@ndesc: number of Tx descriptors spanned by the SGL
1123 *	@txd: first Tx descriptor to be written
1124 *	@txqs: txq state (generation and producer index)
1125 *	@txq: the SGE Tx queue
1126 *	@sgl: the SGL
1127 *	@flits: number of flits to the start of the SGL in the first descriptor
1128 *	@sgl_flits: the SGL size in flits
1129 *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1130 *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1131 *
1132 *	Write a work request header and an associated SGL.  If the SGL is
1133 *	small enough to fit into one Tx descriptor it has already been written
1134 *	and we just need to write the WR header.  Otherwise we distribute the
1135 *	SGL across the number of descriptors it spans.
1136 */
1137static void
1138write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1139    const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1140    unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1141{
1142
1143	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1144	struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1145
1146	if (__predict_true(ndesc == 1)) {
1147		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1148		    V_WR_SGLSFLT(flits)) | wr_hi;
1149		wmb();
1150		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1151		    V_WR_GEN(txqs->gen)) | wr_lo;
1152		/* XXX gen? */
1153		wr_gen2(txd, txqs->gen);
1154
1155	} else {
1156		unsigned int ogen = txqs->gen;
1157		const uint64_t *fp = (const uint64_t *)sgl;
1158		struct work_request_hdr *wp = wrp;
1159
1160		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1161		    V_WR_SGLSFLT(flits)) | wr_hi;
1162
1163		while (sgl_flits) {
1164			unsigned int avail = WR_FLITS - flits;
1165
1166			if (avail > sgl_flits)
1167				avail = sgl_flits;
1168			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1169			sgl_flits -= avail;
1170			ndesc--;
1171			if (!sgl_flits)
1172				break;
1173
1174			fp += avail;
1175			txd++;
1176			txsd++;
1177			if (++txqs->pidx == txq->size) {
1178				txqs->pidx = 0;
1179				txqs->gen ^= 1;
1180				txd = txq->desc;
1181				txsd = txq->sdesc;
1182			}
1183
1184			/*
1185			 * when the head of the mbuf chain
1186			 * is freed all clusters will be freed
1187			 * with it
1188			 */
1189			KASSERT(txsd->mi.mi_base == NULL,
1190			    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1191			wrp = (struct work_request_hdr *)txd;
1192			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1193			    V_WR_SGLSFLT(1)) | wr_hi;
1194			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1195				    sgl_flits + 1)) |
1196			    V_WR_GEN(txqs->gen)) | wr_lo;
1197			wr_gen2(txd, txqs->gen);
1198			flits = 1;
1199		}
1200		wrp->wr_hi |= htonl(F_WR_EOP);
1201		wmb();
1202		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1203		wr_gen2((struct tx_desc *)wp, ogen);
1204	}
1205}
1206
1207/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1208#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1209
1210#ifdef VLAN_SUPPORTED
1211#define GET_VTAG(cntrl, m) \
1212do { \
1213	if ((m)->m_flags & M_VLANTAG)					            \
1214		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1215} while (0)
1216
1217#define GET_VTAG_MI(cntrl, mi) \
1218do { \
1219	if ((mi)->mi_flags & M_VLANTAG)					\
1220		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1221} while (0)
1222#else
1223#define GET_VTAG(cntrl, m)
1224#define GET_VTAG_MI(cntrl, m)
1225#endif
1226
1227int
1228t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1229{
1230	adapter_t *sc;
1231	struct mbuf *m0;
1232	struct sge_txq *txq;
1233	struct txq_state txqs;
1234	struct port_info *pi;
1235	unsigned int ndesc, flits, cntrl, mlen;
1236	int err, nsegs, tso_info = 0;
1237
1238	struct work_request_hdr *wrp;
1239	struct tx_sw_desc *txsd;
1240	struct sg_ent *sgp, *sgl;
1241	uint32_t wr_hi, wr_lo, sgl_flits;
1242	bus_dma_segment_t segs[TX_MAX_SEGS];
1243
1244	struct tx_desc *txd;
1245	struct mbuf_vec *mv;
1246	struct mbuf_iovec *mi;
1247
1248	DPRINTF("t3_encap cpu=%d ", curcpu);
1249
1250	mi = NULL;
1251	pi = qs->port;
1252	sc = pi->adapter;
1253	txq = &qs->txq[TXQ_ETH];
1254	txd = &txq->desc[txq->pidx];
1255	txsd = &txq->sdesc[txq->pidx];
1256	sgl = txq->txq_sgl;
1257	m0 = *m;
1258
1259	DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1260	DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1261	if (cxgb_debug)
1262		printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx);
1263
1264	mtx_assert(&txq->lock, MA_OWNED);
1265	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1266/*
1267 * XXX need to add VLAN support for 6.x
1268 */
1269#ifdef VLAN_SUPPORTED
1270	if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1271		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1272#endif
1273	KASSERT(txsd->mi.mi_base == NULL,
1274	    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1275	if (count > 1) {
1276		if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1277			return (err);
1278		nsegs = count;
1279	} else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1280		if (cxgb_debug)
1281			printf("failed ... err=%d\n", err);
1282		return (err);
1283	}
1284	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1285
1286	if ((m0->m_pkthdr.len > PIO_LEN) || (count > 1)) {
1287		mi_collapse_mbuf(&txsd->mi, m0);
1288		mi = &txsd->mi;
1289	}
1290	if (count > 1) {
1291		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1292		int i, fidx;
1293		struct mbuf_iovec *batchmi;
1294
1295		mv = mtomv(m0);
1296		batchmi = mv->mv_vec;
1297
1298		wrp = (struct work_request_hdr *)txd;
1299
1300		flits = count*2 + 1;
1301		txq_prod(txq, 1, &txqs);
1302
1303		for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1304			struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1305
1306			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1307			GET_VTAG_MI(cntrl, batchmi);
1308			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1309			if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1310				cntrl |= F_TXPKT_IPCSUM_DIS;
1311			if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1312				cntrl |= F_TXPKT_L4CSUM_DIS;
1313			cbe->cntrl = htonl(cntrl);
1314			cbe->len = htonl(batchmi->mi_len | 0x80000000);
1315			cbe->addr = htobe64(segs[i].ds_addr);
1316			txd->flit[fidx] |= htobe64(1 << 24);
1317		}
1318
1319		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1320		    V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1321		wmb();
1322		wrp->wr_lo = htonl(V_WR_LEN(flits) |
1323		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1324		/* XXX gen? */
1325		wr_gen2(txd, txqs.gen);
1326		check_ring_tx_db(sc, txq);
1327
1328		return (0);
1329	} else if (tso_info) {
1330		int min_size = TCPPKTHDRSIZE, eth_type, tagged;
1331		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1332		struct ip *ip;
1333		struct tcphdr *tcp;
1334		char *pkthdr;
1335
1336		txd->flit[2] = 0;
1337		GET_VTAG(cntrl, m0);
1338		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1339		hdr->cntrl = htonl(cntrl);
1340		mlen = m0->m_pkthdr.len;
1341		hdr->len = htonl(mlen | 0x80000000);
1342
1343		DPRINTF("tso buf len=%d\n", mlen);
1344
1345		tagged = m0->m_flags & M_VLANTAG;
1346		if (!tagged)
1347			min_size -= ETHER_VLAN_ENCAP_LEN;
1348
1349		if (__predict_false(mlen < min_size)) {
1350			printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1351			    m0, mlen, m0->m_pkthdr.tso_segsz,
1352			    m0->m_pkthdr.csum_flags, m0->m_flags);
1353			panic("tx tso packet too small");
1354		}
1355
1356		/* Make sure that ether, ip, tcp headers are all in m0 */
1357		if (__predict_false(m0->m_len < min_size)) {
1358			m0 = m_pullup(m0, min_size);
1359			if (__predict_false(m0 == NULL)) {
1360				/* XXX panic probably an overreaction */
1361				panic("couldn't fit header into mbuf");
1362			}
1363		}
1364		pkthdr = m0->m_data;
1365
1366		if (tagged) {
1367			eth_type = CPL_ETH_II_VLAN;
1368			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1369			    ETHER_VLAN_ENCAP_LEN);
1370		} else {
1371			eth_type = CPL_ETH_II;
1372			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1373		}
1374		tcp = (struct tcphdr *)((uint8_t *)ip +
1375		    sizeof(*ip));
1376
1377		tso_info |= V_LSO_ETH_TYPE(eth_type) |
1378			    V_LSO_IPHDR_WORDS(ip->ip_hl) |
1379			    V_LSO_TCPHDR_WORDS(tcp->th_off);
1380		hdr->lso_info = htonl(tso_info);
1381
1382		if (__predict_false(mlen <= PIO_LEN)) {
1383			/* pkt not undersized but fits in PIO_LEN
1384			 * Indicates a TSO bug at the higher levels.
1385			 *
1386			 */
1387			DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1388			    m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1389			txq_prod(txq, 1, &txqs);
1390			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1391			m_freem(m0);
1392			m0 = NULL;
1393			flits = (mlen + 7) / 8 + 3;
1394			hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1395					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1396					  F_WR_SOP | F_WR_EOP | txqs.compl);
1397			wmb();
1398			hdr->wr.wr_lo = htonl(V_WR_LEN(flits) |
1399			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1400
1401			wr_gen2(txd, txqs.gen);
1402			check_ring_tx_db(sc, txq);
1403			return (0);
1404		}
1405		flits = 3;
1406	} else {
1407		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1408
1409		GET_VTAG(cntrl, m0);
1410		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1411		if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1412			cntrl |= F_TXPKT_IPCSUM_DIS;
1413		if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1414			cntrl |= F_TXPKT_L4CSUM_DIS;
1415		cpl->cntrl = htonl(cntrl);
1416		mlen = m0->m_pkthdr.len;
1417		cpl->len = htonl(mlen | 0x80000000);
1418
1419		if (mlen <= PIO_LEN) {
1420			txq_prod(txq, 1, &txqs);
1421			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1422			m_freem(m0);
1423			m0 = NULL;
1424			flits = (mlen + 7) / 8 + 2;
1425			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1426					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1427					  F_WR_SOP | F_WR_EOP | txqs.compl);
1428			wmb();
1429			cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1430			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1431
1432			wr_gen2(txd, txqs.gen);
1433			check_ring_tx_db(sc, txq);
1434			DPRINTF("pio buf\n");
1435			return (0);
1436		}
1437		DPRINTF("regular buf\n");
1438		flits = 2;
1439	}
1440	wrp = (struct work_request_hdr *)txd;
1441
1442#ifdef	nomore
1443	/*
1444	 * XXX need to move into one of the helper routines above
1445	 *
1446	 */
1447	if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1448		return (err);
1449	m0 = *m;
1450#endif
1451	ndesc = calc_tx_descs(m0, nsegs);
1452
1453	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1454	make_sgl(sgp, segs, nsegs);
1455
1456	sgl_flits = sgl_len(nsegs);
1457
1458	DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1459	txq_prod(txq, ndesc, &txqs);
1460	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1461	wr_lo = htonl(V_WR_TID(txq->token));
1462	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1463	check_ring_tx_db(pi->adapter, txq);
1464
1465	if ((m0->m_type == MT_DATA) &&
1466	    ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1467	    (m0->m_ext.ext_type != EXT_PACKET)) {
1468		m0->m_flags &= ~M_EXT ;
1469		cxgb_mbufs_outstanding--;
1470		m_free(m0);
1471	}
1472
1473	return (0);
1474}
1475
1476
1477/**
1478 *	write_imm - write a packet into a Tx descriptor as immediate data
1479 *	@d: the Tx descriptor to write
1480 *	@m: the packet
1481 *	@len: the length of packet data to write as immediate data
1482 *	@gen: the generation bit value to write
1483 *
1484 *	Writes a packet as immediate data into a Tx descriptor.  The packet
1485 *	contains a work request at its beginning.  We must write the packet
1486 *	carefully so the SGE doesn't read accidentally before it's written in
1487 *	its entirety.
1488 */
1489static __inline void
1490write_imm(struct tx_desc *d, struct mbuf *m,
1491	  unsigned int len, unsigned int gen)
1492{
1493	struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1494	struct work_request_hdr *to = (struct work_request_hdr *)d;
1495
1496	if (len > WR_LEN)
1497		panic("len too big %d\n", len);
1498	if (len < sizeof(*from))
1499		panic("len too small %d", len);
1500
1501	memcpy(&to[1], &from[1], len - sizeof(*from));
1502	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1503					V_WR_BCNTLFLT(len & 7));
1504	wmb();
1505	to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1506					V_WR_LEN((len + 7) / 8));
1507	wr_gen2(d, gen);
1508
1509	/*
1510	 * This check is a hack we should really fix the logic so
1511	 * that this can't happen
1512	 */
1513	if (m->m_type != MT_DONTFREE)
1514		m_freem(m);
1515
1516}
1517
1518/**
1519 *	check_desc_avail - check descriptor availability on a send queue
1520 *	@adap: the adapter
1521 *	@q: the TX queue
1522 *	@m: the packet needing the descriptors
1523 *	@ndesc: the number of Tx descriptors needed
1524 *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1525 *
1526 *	Checks if the requested number of Tx descriptors is available on an
1527 *	SGE send queue.  If the queue is already suspended or not enough
1528 *	descriptors are available the packet is queued for later transmission.
1529 *	Must be called with the Tx queue locked.
1530 *
1531 *	Returns 0 if enough descriptors are available, 1 if there aren't
1532 *	enough descriptors and the packet has been queued, and 2 if the caller
1533 *	needs to retry because there weren't enough descriptors at the
1534 *	beginning of the call but some freed up in the mean time.
1535 */
1536static __inline int
1537check_desc_avail(adapter_t *adap, struct sge_txq *q,
1538		 struct mbuf *m, unsigned int ndesc,
1539		 unsigned int qid)
1540{
1541	/*
1542	 * XXX We currently only use this for checking the control queue
1543	 * the control queue is only used for binding qsets which happens
1544	 * at init time so we are guaranteed enough descriptors
1545	 */
1546	if (__predict_false(!mbufq_empty(&q->sendq))) {
1547addq_exit:	mbufq_tail(&q->sendq, m);
1548		return 1;
1549	}
1550	if (__predict_false(q->size - q->in_use < ndesc)) {
1551
1552		struct sge_qset *qs = txq_to_qset(q, qid);
1553
1554		printf("stopping q\n");
1555
1556		setbit(&qs->txq_stopped, qid);
1557		smp_mb();
1558
1559		if (should_restart_tx(q) &&
1560		    test_and_clear_bit(qid, &qs->txq_stopped))
1561			return 2;
1562
1563		q->stops++;
1564		goto addq_exit;
1565	}
1566	return 0;
1567}
1568
1569
1570/**
1571 *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1572 *	@q: the SGE control Tx queue
1573 *
1574 *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1575 *	that send only immediate data (presently just the control queues) and
1576 *	thus do not have any mbufs
1577 */
1578static __inline void
1579reclaim_completed_tx_imm(struct sge_txq *q)
1580{
1581	unsigned int reclaim = q->processed - q->cleaned;
1582
1583	mtx_assert(&q->lock, MA_OWNED);
1584
1585	q->in_use -= reclaim;
1586	q->cleaned += reclaim;
1587}
1588
1589static __inline int
1590immediate(const struct mbuf *m)
1591{
1592	return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1593}
1594
1595/**
1596 *	ctrl_xmit - send a packet through an SGE control Tx queue
1597 *	@adap: the adapter
1598 *	@q: the control queue
1599 *	@m: the packet
1600 *
1601 *	Send a packet through an SGE control Tx queue.  Packets sent through
1602 *	a control queue must fit entirely as immediate data in a single Tx
1603 *	descriptor and have no page fragments.
1604 */
1605static int
1606ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1607{
1608	int ret;
1609	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1610
1611	if (__predict_false(!immediate(m))) {
1612		m_freem(m);
1613		return 0;
1614	}
1615
1616	wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1617	wrp->wr_lo = htonl(V_WR_TID(q->token));
1618
1619	mtx_lock(&q->lock);
1620again:	reclaim_completed_tx_imm(q);
1621
1622	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1623	if (__predict_false(ret)) {
1624		if (ret == 1) {
1625			mtx_unlock(&q->lock);
1626			log(LOG_ERR, "no desc available\n");
1627			return (ENOSPC);
1628		}
1629		goto again;
1630	}
1631	write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1632
1633	q->in_use++;
1634	if (++q->pidx >= q->size) {
1635		q->pidx = 0;
1636		q->gen ^= 1;
1637	}
1638	mtx_unlock(&q->lock);
1639	wmb();
1640	t3_write_reg(adap, A_SG_KDOORBELL,
1641		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1642	return (0);
1643}
1644
1645
1646/**
1647 *	restart_ctrlq - restart a suspended control queue
1648 *	@qs: the queue set cotaining the control queue
1649 *
1650 *	Resumes transmission on a suspended Tx control queue.
1651 */
1652static void
1653restart_ctrlq(void *data, int npending)
1654{
1655	struct mbuf *m;
1656	struct sge_qset *qs = (struct sge_qset *)data;
1657	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1658	adapter_t *adap = qs->port->adapter;
1659
1660	log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1661
1662	mtx_lock(&q->lock);
1663again:	reclaim_completed_tx_imm(q);
1664
1665	while (q->in_use < q->size &&
1666	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1667
1668		write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1669
1670		if (++q->pidx >= q->size) {
1671			q->pidx = 0;
1672			q->gen ^= 1;
1673		}
1674		q->in_use++;
1675	}
1676	if (!mbufq_empty(&q->sendq)) {
1677		setbit(&qs->txq_stopped, TXQ_CTRL);
1678		smp_mb();
1679
1680		if (should_restart_tx(q) &&
1681		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1682			goto again;
1683		q->stops++;
1684	}
1685	mtx_unlock(&q->lock);
1686	wmb();
1687	t3_write_reg(adap, A_SG_KDOORBELL,
1688		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1689}
1690
1691
1692/*
1693 * Send a management message through control queue 0
1694 */
1695int
1696t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1697{
1698	return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1699}
1700
1701
1702/**
1703 *	free_qset - free the resources of an SGE queue set
1704 *	@sc: the controller owning the queue set
1705 *	@q: the queue set
1706 *
1707 *	Release the HW and SW resources associated with an SGE queue set, such
1708 *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1709 *	queue set must be quiesced prior to calling this.
1710 */
1711void
1712t3_free_qset(adapter_t *sc, struct sge_qset *q)
1713{
1714	int i;
1715
1716	t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1717
1718	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1719		if (q->txq[i].txq_mr != NULL)
1720			buf_ring_free(q->txq[i].txq_mr, M_DEVBUF);
1721		if (q->txq[i].txq_ifq != NULL) {
1722			ifq_delete(q->txq[i].txq_ifq);
1723			free(q->txq[i].txq_ifq, M_DEVBUF);
1724		}
1725	}
1726
1727	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1728		if (q->fl[i].desc) {
1729			mtx_lock_spin(&sc->sge.reg_lock);
1730			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1731			mtx_unlock_spin(&sc->sge.reg_lock);
1732			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1733			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1734					q->fl[i].desc_map);
1735			bus_dma_tag_destroy(q->fl[i].desc_tag);
1736			bus_dma_tag_destroy(q->fl[i].entry_tag);
1737		}
1738		if (q->fl[i].sdesc) {
1739			free_rx_bufs(sc, &q->fl[i]);
1740			free(q->fl[i].sdesc, M_DEVBUF);
1741		}
1742	}
1743
1744	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1745		if (q->txq[i].desc) {
1746			mtx_lock_spin(&sc->sge.reg_lock);
1747			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1748			mtx_unlock_spin(&sc->sge.reg_lock);
1749			bus_dmamap_unload(q->txq[i].desc_tag,
1750					q->txq[i].desc_map);
1751			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1752					q->txq[i].desc_map);
1753			bus_dma_tag_destroy(q->txq[i].desc_tag);
1754			bus_dma_tag_destroy(q->txq[i].entry_tag);
1755			MTX_DESTROY(&q->txq[i].lock);
1756		}
1757		if (q->txq[i].sdesc) {
1758			free(q->txq[i].sdesc, M_DEVBUF);
1759		}
1760	}
1761
1762	if (q->rspq.desc) {
1763		mtx_lock_spin(&sc->sge.reg_lock);
1764		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1765		mtx_unlock_spin(&sc->sge.reg_lock);
1766
1767		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1768		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1769			        q->rspq.desc_map);
1770		bus_dma_tag_destroy(q->rspq.desc_tag);
1771		MTX_DESTROY(&q->rspq.lock);
1772	}
1773
1774#ifdef LRO_SUPPORTED
1775	tcp_lro_free(&q->lro.ctrl);
1776#endif
1777
1778	bzero(q, sizeof(*q));
1779}
1780
1781/**
1782 *	t3_free_sge_resources - free SGE resources
1783 *	@sc: the adapter softc
1784 *
1785 *	Frees resources used by the SGE queue sets.
1786 */
1787void
1788t3_free_sge_resources(adapter_t *sc)
1789{
1790	int i, nqsets;
1791
1792#ifdef IFNET_MULTIQUEUE
1793	panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__);
1794#endif
1795	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1796		nqsets += sc->port[i].nqsets;
1797
1798	for (i = 0; i < nqsets; ++i)
1799		t3_free_qset(sc, &sc->sge.qs[i]);
1800}
1801
1802/**
1803 *	t3_sge_start - enable SGE
1804 *	@sc: the controller softc
1805 *
1806 *	Enables the SGE for DMAs.  This is the last step in starting packet
1807 *	transfers.
1808 */
1809void
1810t3_sge_start(adapter_t *sc)
1811{
1812	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1813}
1814
1815/**
1816 *	t3_sge_stop - disable SGE operation
1817 *	@sc: the adapter
1818 *
1819 *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
1820 *	from error interrupts) or from normal process context.  In the latter
1821 *	case it also disables any pending queue restart tasklets.  Note that
1822 *	if it is called in interrupt context it cannot disable the restart
1823 *	tasklets as it cannot wait, however the tasklets will have no effect
1824 *	since the doorbells are disabled and the driver will call this again
1825 *	later from process context, at which time the tasklets will be stopped
1826 *	if they are still running.
1827 */
1828void
1829t3_sge_stop(adapter_t *sc)
1830{
1831	int i, nqsets;
1832
1833	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1834
1835	if (sc->tq == NULL)
1836		return;
1837
1838	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1839		nqsets += sc->port[i].nqsets;
1840#ifdef notyet
1841	/*
1842	 *
1843	 * XXX
1844	 */
1845	for (i = 0; i < nqsets; ++i) {
1846		struct sge_qset *qs = &sc->sge.qs[i];
1847
1848		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1849		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1850	}
1851#endif
1852}
1853
1854/**
1855 *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
1856 *	@adapter: the adapter
1857 *	@q: the Tx queue to reclaim descriptors from
1858 *	@reclaimable: the number of descriptors to reclaim
1859 *      @m_vec_size: maximum number of buffers to reclaim
1860 *      @desc_reclaimed: returns the number of descriptors reclaimed
1861 *
1862 *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1863 *	Tx buffers.  Called with the Tx queue lock held.
1864 *
1865 *      Returns number of buffers of reclaimed
1866 */
1867void
1868t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1869{
1870	struct tx_sw_desc *txsd;
1871	unsigned int cidx;
1872
1873#ifdef T3_TRACE
1874	T3_TRACE2(sc->tb[q->cntxt_id & 7],
1875		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1876#endif
1877	cidx = q->cidx;
1878	txsd = &q->sdesc[cidx];
1879	DPRINTF("reclaiming %d WR\n", reclaimable);
1880	mtx_assert(&q->lock, MA_OWNED);
1881	while (reclaimable--) {
1882		DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1883		if (txsd->mi.mi_base != NULL) {
1884			if (txsd->flags & TX_SW_DESC_MAPPED) {
1885				bus_dmamap_unload(q->entry_tag, txsd->map);
1886				txsd->flags &= ~TX_SW_DESC_MAPPED;
1887			}
1888			m_freem_iovec(&txsd->mi);
1889#if 0
1890			buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1891#endif
1892			txsd->mi.mi_base = NULL;
1893		} else
1894			q->txq_skipped++;
1895
1896		++txsd;
1897		if (++cidx == q->size) {
1898			cidx = 0;
1899			txsd = q->sdesc;
1900		}
1901	}
1902	q->cidx = cidx;
1903
1904}
1905
1906void
1907t3_free_tx_desc_all(struct sge_txq *q)
1908{
1909	int i;
1910	struct tx_sw_desc *txsd;
1911
1912	for (i = 0; i < q->size; i++) {
1913		txsd = &q->sdesc[i];
1914		if (txsd->mi.mi_base != NULL) {
1915			if (txsd->flags & TX_SW_DESC_MAPPED) {
1916				bus_dmamap_unload(q->entry_tag, txsd->map);
1917				txsd->flags &= ~TX_SW_DESC_MAPPED;
1918			}
1919			m_freem_iovec(&txsd->mi);
1920			bzero(&txsd->mi, sizeof(txsd->mi));
1921		}
1922	}
1923}
1924
1925/**
1926 *	is_new_response - check if a response is newly written
1927 *	@r: the response descriptor
1928 *	@q: the response queue
1929 *
1930 *	Returns true if a response descriptor contains a yet unprocessed
1931 *	response.
1932 */
1933static __inline int
1934is_new_response(const struct rsp_desc *r,
1935    const struct sge_rspq *q)
1936{
1937	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1938}
1939
1940#define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1941#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1942			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1943			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1944			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1945
1946/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1947#define NOMEM_INTR_DELAY 2500
1948
1949/**
1950 *	write_ofld_wr - write an offload work request
1951 *	@adap: the adapter
1952 *	@m: the packet to send
1953 *	@q: the Tx queue
1954 *	@pidx: index of the first Tx descriptor to write
1955 *	@gen: the generation value to use
1956 *	@ndesc: number of descriptors the packet will occupy
1957 *
1958 *	Write an offload work request to send the supplied packet.  The packet
1959 *	data already carry the work request with most fields populated.
1960 */
1961static void
1962write_ofld_wr(adapter_t *adap, struct mbuf *m,
1963    struct sge_txq *q, unsigned int pidx,
1964    unsigned int gen, unsigned int ndesc,
1965    bus_dma_segment_t *segs, unsigned int nsegs)
1966{
1967	unsigned int sgl_flits, flits;
1968	struct work_request_hdr *from;
1969	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1970	struct tx_desc *d = &q->desc[pidx];
1971	struct txq_state txqs;
1972
1973	if (immediate(m) && nsegs == 0) {
1974		write_imm(d, m, m->m_len, gen);
1975		return;
1976	}
1977
1978	/* Only TX_DATA builds SGLs */
1979	from = mtod(m, struct work_request_hdr *);
1980	memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1981
1982	flits = m->m_len / 8;
1983	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1984
1985	make_sgl(sgp, segs, nsegs);
1986	sgl_flits = sgl_len(nsegs);
1987
1988	txqs.gen = gen;
1989	txqs.pidx = pidx;
1990	txqs.compl = 0;
1991
1992	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1993	    from->wr_hi, from->wr_lo);
1994}
1995
1996/**
1997 *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1998 *	@m: the packet
1999 *
2000 * 	Returns the number of Tx descriptors needed for the given offload
2001 * 	packet.  These packets are already fully constructed.
2002 */
2003static __inline unsigned int
2004calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2005{
2006	unsigned int flits, cnt = 0;
2007	int ndescs;
2008
2009	if (m->m_len <= WR_LEN && nsegs == 0)
2010		return (1);                 /* packet fits as immediate data */
2011
2012	if (m->m_flags & M_IOVEC)
2013		cnt = mtomv(m)->mv_count;
2014	else
2015		cnt = nsegs;
2016
2017	/* headers */
2018	flits = m->m_len / 8;
2019
2020	ndescs = flits_to_desc(flits + sgl_len(cnt));
2021
2022	return (ndescs);
2023}
2024
2025/**
2026 *	ofld_xmit - send a packet through an offload queue
2027 *	@adap: the adapter
2028 *	@q: the Tx offload queue
2029 *	@m: the packet
2030 *
2031 *	Send an offload packet through an SGE offload queue.
2032 */
2033static int
2034ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
2035{
2036	int ret, nsegs;
2037	unsigned int ndesc;
2038	unsigned int pidx, gen;
2039	bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2040	struct tx_sw_desc *stx;
2041
2042	nsegs = m_get_sgllen(m);
2043	vsegs = m_get_sgl(m);
2044	ndesc = calc_tx_descs_ofld(m, nsegs);
2045	busdma_map_sgl(vsegs, segs, nsegs);
2046
2047	stx = &q->sdesc[q->pidx];
2048	KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
2049
2050	mtx_lock(&q->lock);
2051again:	reclaim_completed_tx_(q, 16);
2052	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2053	if (__predict_false(ret)) {
2054		if (ret == 1) {
2055			printf("no ofld desc avail\n");
2056
2057			m_set_priority(m, ndesc);     /* save for restart */
2058			mtx_unlock(&q->lock);
2059			return (EINTR);
2060		}
2061		goto again;
2062	}
2063
2064	gen = q->gen;
2065	q->in_use += ndesc;
2066	pidx = q->pidx;
2067	q->pidx += ndesc;
2068	if (q->pidx >= q->size) {
2069		q->pidx -= q->size;
2070		q->gen ^= 1;
2071	}
2072#ifdef T3_TRACE
2073	T3_TRACE5(adap->tb[q->cntxt_id & 7],
2074		  "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2075		  ndesc, pidx, skb->len, skb->len - skb->data_len,
2076		  skb_shinfo(skb)->nr_frags);
2077#endif
2078	mtx_unlock(&q->lock);
2079
2080	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2081	check_ring_tx_db(adap, q);
2082	return (0);
2083}
2084
2085/**
2086 *	restart_offloadq - restart a suspended offload queue
2087 *	@qs: the queue set cotaining the offload queue
2088 *
2089 *	Resumes transmission on a suspended Tx offload queue.
2090 */
2091static void
2092restart_offloadq(void *data, int npending)
2093{
2094	struct mbuf *m;
2095	struct sge_qset *qs = data;
2096	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2097	adapter_t *adap = qs->port->adapter;
2098	bus_dma_segment_t segs[TX_MAX_SEGS];
2099	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2100	int nsegs, cleaned;
2101
2102	mtx_lock(&q->lock);
2103again:	cleaned = reclaim_completed_tx_(q, 16);
2104
2105	while ((m = mbufq_peek(&q->sendq)) != NULL) {
2106		unsigned int gen, pidx;
2107		unsigned int ndesc = m_get_priority(m);
2108
2109		if (__predict_false(q->size - q->in_use < ndesc)) {
2110			setbit(&qs->txq_stopped, TXQ_OFLD);
2111			smp_mb();
2112
2113			if (should_restart_tx(q) &&
2114			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2115				goto again;
2116			q->stops++;
2117			break;
2118		}
2119
2120		gen = q->gen;
2121		q->in_use += ndesc;
2122		pidx = q->pidx;
2123		q->pidx += ndesc;
2124		if (q->pidx >= q->size) {
2125			q->pidx -= q->size;
2126			q->gen ^= 1;
2127		}
2128
2129		(void)mbufq_dequeue(&q->sendq);
2130		busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2131		mtx_unlock(&q->lock);
2132		write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2133		mtx_lock(&q->lock);
2134	}
2135	mtx_unlock(&q->lock);
2136
2137#if USE_GTS
2138	set_bit(TXQ_RUNNING, &q->flags);
2139	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2140#endif
2141	wmb();
2142	t3_write_reg(adap, A_SG_KDOORBELL,
2143		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2144}
2145
2146/**
2147 *	queue_set - return the queue set a packet should use
2148 *	@m: the packet
2149 *
2150 *	Maps a packet to the SGE queue set it should use.  The desired queue
2151 *	set is carried in bits 1-3 in the packet's priority.
2152 */
2153static __inline int
2154queue_set(const struct mbuf *m)
2155{
2156	return m_get_priority(m) >> 1;
2157}
2158
2159/**
2160 *	is_ctrl_pkt - return whether an offload packet is a control packet
2161 *	@m: the packet
2162 *
2163 *	Determines whether an offload packet should use an OFLD or a CTRL
2164 *	Tx queue.  This is indicated by bit 0 in the packet's priority.
2165 */
2166static __inline int
2167is_ctrl_pkt(const struct mbuf *m)
2168{
2169	return m_get_priority(m) & 1;
2170}
2171
2172/**
2173 *	t3_offload_tx - send an offload packet
2174 *	@tdev: the offload device to send to
2175 *	@m: the packet
2176 *
2177 *	Sends an offload packet.  We use the packet priority to select the
2178 *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2179 *	should be sent as regular or control, bits 1-3 select the queue set.
2180 */
2181int
2182t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2183{
2184	adapter_t *adap = tdev2adap(tdev);
2185	struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2186
2187	if (__predict_false(is_ctrl_pkt(m)))
2188		return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2189
2190	return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2191}
2192
2193/**
2194 *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2195 *	@tdev: the offload device that will be receiving the packets
2196 *	@q: the SGE response queue that assembled the bundle
2197 *	@m: the partial bundle
2198 *	@n: the number of packets in the bundle
2199 *
2200 *	Delivers a (partial) bundle of Rx offload packets to an offload device.
2201 */
2202static __inline void
2203deliver_partial_bundle(struct t3cdev *tdev,
2204			struct sge_rspq *q,
2205			struct mbuf *mbufs[], int n)
2206{
2207	if (n) {
2208		q->offload_bundles++;
2209		cxgb_ofld_recv(tdev, mbufs, n);
2210	}
2211}
2212
2213static __inline int
2214rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2215    struct mbuf *m, struct mbuf *rx_gather[],
2216    unsigned int gather_idx)
2217{
2218
2219	rq->offload_pkts++;
2220	m->m_pkthdr.header = mtod(m, void *);
2221	rx_gather[gather_idx++] = m;
2222	if (gather_idx == RX_BUNDLE_SIZE) {
2223		cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2224		gather_idx = 0;
2225		rq->offload_bundles++;
2226	}
2227	return (gather_idx);
2228}
2229
2230static void
2231restart_tx(struct sge_qset *qs)
2232{
2233	struct adapter *sc = qs->port->adapter;
2234
2235
2236	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2237	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2238	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2239		qs->txq[TXQ_OFLD].restarts++;
2240		DPRINTF("restarting TXQ_OFLD\n");
2241		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2242	}
2243	DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2244	    qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2245	    qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2246	    qs->txq[TXQ_CTRL].in_use);
2247
2248	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2249	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2250	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2251		qs->txq[TXQ_CTRL].restarts++;
2252		DPRINTF("restarting TXQ_CTRL\n");
2253		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2254	}
2255}
2256
2257/**
2258 *	t3_sge_alloc_qset - initialize an SGE queue set
2259 *	@sc: the controller softc
2260 *	@id: the queue set id
2261 *	@nports: how many Ethernet ports will be using this queue set
2262 *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2263 *	@p: configuration parameters for this queue set
2264 *	@ntxq: number of Tx queues for the queue set
2265 *	@pi: port info for queue set
2266 *
2267 *	Allocate resources and initialize an SGE queue set.  A queue set
2268 *	comprises a response queue, two Rx free-buffer queues, and up to 3
2269 *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2270 *	queue, offload queue, and control queue.
2271 */
2272int
2273t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2274		  const struct qset_params *p, int ntxq, struct port_info *pi)
2275{
2276	struct sge_qset *q = &sc->sge.qs[id];
2277	int i, header_size, ret = 0;
2278
2279	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2280
2281		if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2282			    M_DEVBUF, M_WAITOK, &q->txq[i].lock)) == NULL) {
2283			device_printf(sc->dev, "failed to allocate mbuf ring\n");
2284			goto err;
2285		}
2286		if ((q->txq[i].txq_ifq =
2287			malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO))
2288		    == NULL) {
2289			device_printf(sc->dev, "failed to allocate ifq\n");
2290			goto err;
2291		}
2292		ifq_init(q->txq[i].txq_ifq, pi->ifp);
2293	}
2294	init_qset_cntxt(q, id);
2295	q->idx = id;
2296	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2297		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2298		    &q->fl[0].desc, &q->fl[0].sdesc,
2299		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2300		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2301		printf("error %d from alloc ring fl0\n", ret);
2302		goto err;
2303	}
2304
2305	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2306		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2307		    &q->fl[1].desc, &q->fl[1].sdesc,
2308		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2309		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2310		printf("error %d from alloc ring fl1\n", ret);
2311		goto err;
2312	}
2313
2314	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2315		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2316		    &q->rspq.desc_tag, &q->rspq.desc_map,
2317		    NULL, NULL)) != 0) {
2318		printf("error %d from alloc ring rspq\n", ret);
2319		goto err;
2320	}
2321
2322	for (i = 0; i < ntxq; ++i) {
2323		/*
2324		 * The control queue always uses immediate data so does not
2325		 * need to keep track of any mbufs.
2326		 * XXX Placeholder for future TOE support.
2327		 */
2328		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2329
2330		if ((ret = alloc_ring(sc, p->txq_size[i],
2331			    sizeof(struct tx_desc), sz,
2332			    &q->txq[i].phys_addr, &q->txq[i].desc,
2333			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2334			    &q->txq[i].desc_map,
2335			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2336			printf("error %d from alloc ring tx %i\n", ret, i);
2337			goto err;
2338		}
2339		mbufq_init(&q->txq[i].sendq);
2340		q->txq[i].gen = 1;
2341		q->txq[i].size = p->txq_size[i];
2342		snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2343		    device_get_unit(sc->dev), irq_vec_idx, i);
2344		MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2345	}
2346
2347	q->txq[TXQ_ETH].port = pi;
2348
2349	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2350	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2351	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2352	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2353
2354	q->fl[0].gen = q->fl[1].gen = 1;
2355	q->fl[0].size = p->fl_size;
2356	q->fl[1].size = p->jumbo_size;
2357
2358	q->rspq.gen = 1;
2359	q->rspq.cidx = 0;
2360	q->rspq.size = p->rspq_size;
2361
2362
2363	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2364	q->txq[TXQ_ETH].stop_thres = nports *
2365	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2366
2367	q->fl[0].buf_size = (MCLBYTES - header_size);
2368	q->fl[0].zone = zone_clust;
2369	q->fl[0].type = EXT_CLUSTER;
2370#if __FreeBSD_version > 800000
2371	if (cxgb_use_16k_clusters) {
2372		q->fl[1].buf_size = MJUM16BYTES - header_size;
2373		q->fl[1].zone = zone_jumbo16;
2374		q->fl[1].type = EXT_JUMBO16;
2375	} else {
2376		q->fl[1].buf_size = MJUM9BYTES - header_size;
2377		q->fl[1].zone = zone_jumbo9;
2378		q->fl[1].type = EXT_JUMBO9;
2379	}
2380#else
2381	q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2382	q->fl[1].zone = zone_jumbop;
2383	q->fl[1].type = EXT_JUMBOP;
2384#endif
2385
2386#ifdef LRO_SUPPORTED
2387	/* Allocate and setup the lro_ctrl structure */
2388	q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2389	ret = tcp_lro_init(&q->lro.ctrl);
2390	if (ret) {
2391		printf("error %d from tcp_lro_init\n", ret);
2392		goto err;
2393	}
2394	q->lro.ctrl.ifp = pi->ifp;
2395#endif
2396
2397	mtx_lock_spin(&sc->sge.reg_lock);
2398	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2399				   q->rspq.phys_addr, q->rspq.size,
2400				   q->fl[0].buf_size, 1, 0);
2401	if (ret) {
2402		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2403		goto err_unlock;
2404	}
2405
2406	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2407		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2408					  q->fl[i].phys_addr, q->fl[i].size,
2409					  q->fl[i].buf_size, p->cong_thres, 1,
2410					  0);
2411		if (ret) {
2412			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2413			goto err_unlock;
2414		}
2415	}
2416
2417	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2418				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2419				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2420				 1, 0);
2421	if (ret) {
2422		printf("error %d from t3_sge_init_ecntxt\n", ret);
2423		goto err_unlock;
2424	}
2425
2426	if (ntxq > 1) {
2427		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2428					 USE_GTS, SGE_CNTXT_OFLD, id,
2429					 q->txq[TXQ_OFLD].phys_addr,
2430					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2431		if (ret) {
2432			printf("error %d from t3_sge_init_ecntxt\n", ret);
2433			goto err_unlock;
2434		}
2435	}
2436
2437	if (ntxq > 2) {
2438		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2439					 SGE_CNTXT_CTRL, id,
2440					 q->txq[TXQ_CTRL].phys_addr,
2441					 q->txq[TXQ_CTRL].size,
2442					 q->txq[TXQ_CTRL].token, 1, 0);
2443		if (ret) {
2444			printf("error %d from t3_sge_init_ecntxt\n", ret);
2445			goto err_unlock;
2446		}
2447	}
2448
2449	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2450	    device_get_unit(sc->dev), irq_vec_idx);
2451	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2452
2453	mtx_unlock_spin(&sc->sge.reg_lock);
2454	t3_update_qset_coalesce(q, p);
2455	q->port = pi;
2456
2457	refill_fl(sc, &q->fl[0], q->fl[0].size);
2458	refill_fl(sc, &q->fl[1], q->fl[1].size);
2459	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2460
2461	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2462		     V_NEWTIMER(q->rspq.holdoff_tmr));
2463
2464	return (0);
2465
2466err_unlock:
2467	mtx_unlock_spin(&sc->sge.reg_lock);
2468err:
2469	t3_free_qset(sc, q);
2470
2471	return (ret);
2472}
2473
2474/*
2475 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2476 * ethernet data.  Hardware assistance with various checksums and any vlan tag
2477 * will also be taken into account here.
2478 */
2479void
2480t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2481{
2482	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2483	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2484	struct ifnet *ifp = pi->ifp;
2485
2486	DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2487
2488	if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2489	    cpl->csum_valid && cpl->csum == 0xffff) {
2490		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2491		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2492		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2493		m->m_pkthdr.csum_data = 0xffff;
2494	}
2495	/*
2496	 * XXX need to add VLAN support for 6.x
2497	 */
2498#ifdef VLAN_SUPPORTED
2499	if (__predict_false(cpl->vlan_valid)) {
2500		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2501		m->m_flags |= M_VLANTAG;
2502	}
2503#endif
2504
2505	m->m_pkthdr.rcvif = ifp;
2506	m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2507#ifndef DISABLE_MBUF_IOVEC
2508	m_explode(m);
2509#endif
2510	/*
2511	 * adjust after conversion to mbuf chain
2512	 */
2513	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2514	m->m_len -= (sizeof(*cpl) + ethpad);
2515	m->m_data += (sizeof(*cpl) + ethpad);
2516}
2517
2518static void
2519ext_free_handler(void *arg1, void * arg2)
2520{
2521	uintptr_t type = (uintptr_t)arg2;
2522	uma_zone_t zone;
2523	struct mbuf *m;
2524
2525	m = arg1;
2526	zone = m_getzonefromtype(type);
2527	m->m_ext.ext_type = (int)type;
2528	cxgb_ext_freed++;
2529	cxgb_cache_put(zone, m);
2530}
2531
2532static void
2533init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2534{
2535	struct mbuf *m;
2536	int header_size;
2537
2538	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2539	    sizeof(struct m_ext_) + sizeof(uint32_t);
2540
2541	bzero(cl, header_size);
2542	m = (struct mbuf *)cl;
2543
2544	cxgb_ext_inited++;
2545	SLIST_INIT(&m->m_pkthdr.tags);
2546	m->m_type = MT_DATA;
2547	m->m_flags = flags | M_NOFREE | M_EXT;
2548	m->m_data = cl + header_size;
2549	m->m_ext.ext_buf = cl;
2550	m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2551	m->m_ext.ext_size = m_getsizefromtype(type);
2552	m->m_ext.ext_free = ext_free_handler;
2553#if __FreeBSD_version >= 800016
2554	m->m_ext.ext_arg1 = cl;
2555	m->m_ext.ext_arg2 = (void *)(uintptr_t)type;
2556#else
2557	m->m_ext.ext_args = (void *)(uintptr_t)type;
2558#endif
2559	m->m_ext.ext_type = EXT_EXTREF;
2560	*(m->m_ext.ref_cnt) = 1;
2561	DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2562}
2563
2564
2565/**
2566 *	get_packet - return the next ingress packet buffer from a free list
2567 *	@adap: the adapter that received the packet
2568 *	@drop_thres: # of remaining buffers before we start dropping packets
2569 *	@qs: the qset that the SGE free list holding the packet belongs to
2570 *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2571 *      @r: response descriptor
2572 *
2573 *	Get the next packet from a free list and complete setup of the
2574 *	sk_buff.  If the packet is small we make a copy and recycle the
2575 *	original buffer, otherwise we use the original buffer itself.  If a
2576 *	positive drop threshold is supplied packets are dropped and their
2577 *	buffers recycled if (a) the number of remaining buffers is under the
2578 *	threshold and the packet is too big to copy, or (b) the packet should
2579 *	be copied but there is no memory for the copy.
2580 */
2581#ifdef DISABLE_MBUF_IOVEC
2582
2583static int
2584get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2585    struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2586{
2587
2588	unsigned int len_cq =  ntohl(r->len_cq);
2589	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2590	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2591	uint32_t len = G_RSPD_LEN(len_cq);
2592	uint32_t flags = ntohl(r->flags);
2593	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2594	caddr_t cl;
2595	struct mbuf *m, *m0;
2596	int ret = 0;
2597
2598	prefetch(sd->rxsd_cl);
2599
2600	fl->credits--;
2601	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2602
2603	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2604		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2605			goto skip_recycle;
2606		cl = mtod(m0, void *);
2607		memcpy(cl, sd->data, len);
2608		recycle_rx_buf(adap, fl, fl->cidx);
2609		m = m0;
2610		m0->m_len = len;
2611	} else {
2612	skip_recycle:
2613
2614		bus_dmamap_unload(fl->entry_tag, sd->map);
2615		cl = sd->rxsd_cl;
2616		m = m0 = (struct mbuf *)cl;
2617
2618		if ((sopeop == RSPQ_SOP_EOP) ||
2619		    (sopeop == RSPQ_SOP))
2620			flags = M_PKTHDR;
2621		init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2622		m0->m_len = len;
2623	}
2624	switch(sopeop) {
2625	case RSPQ_SOP_EOP:
2626		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2627		mh->mh_head = mh->mh_tail = m;
2628		m->m_pkthdr.len = len;
2629		ret = 1;
2630		break;
2631	case RSPQ_NSOP_NEOP:
2632		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2633		if (mh->mh_tail == NULL) {
2634			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2635			m_freem(m);
2636			break;
2637		}
2638		mh->mh_tail->m_next = m;
2639		mh->mh_tail = m;
2640		mh->mh_head->m_pkthdr.len += len;
2641		ret = 0;
2642		break;
2643	case RSPQ_SOP:
2644		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2645		m->m_pkthdr.len = len;
2646		mh->mh_head = mh->mh_tail = m;
2647		ret = 0;
2648		break;
2649	case RSPQ_EOP:
2650		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2651		mh->mh_head->m_pkthdr.len += len;
2652		mh->mh_tail->m_next = m;
2653		mh->mh_tail = m;
2654		ret = 1;
2655		break;
2656	}
2657	if (++fl->cidx == fl->size)
2658		fl->cidx = 0;
2659
2660	return (ret);
2661}
2662
2663#else
2664
2665static int
2666get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2667    struct mbuf **m, struct rsp_desc *r)
2668{
2669
2670	unsigned int len_cq =  ntohl(r->len_cq);
2671	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2672	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2673	uint32_t len = G_RSPD_LEN(len_cq);
2674	uint32_t flags = ntohl(r->flags);
2675	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2676	void *cl;
2677	int ret = 0;
2678	struct mbuf *m0;
2679#if 0
2680	if ((sd + 1 )->rxsd_cl)
2681		prefetch((sd + 1)->rxsd_cl);
2682	if ((sd + 2)->rxsd_cl)
2683		prefetch((sd + 2)->rxsd_cl);
2684#endif
2685	DPRINTF("rx cpu=%d\n", curcpu);
2686	fl->credits--;
2687	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2688
2689	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2690		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2691			goto skip_recycle;
2692		cl = mtod(m0, void *);
2693		memcpy(cl, sd->data, len);
2694		recycle_rx_buf(adap, fl, fl->cidx);
2695		*m = m0;
2696	} else {
2697	skip_recycle:
2698		bus_dmamap_unload(fl->entry_tag, sd->map);
2699		cl = sd->rxsd_cl;
2700		*m = m0 = (struct mbuf *)cl;
2701	}
2702
2703	switch(sopeop) {
2704	case RSPQ_SOP_EOP:
2705		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2706		if (cl == sd->rxsd_cl)
2707			init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2708		m0->m_len = m0->m_pkthdr.len = len;
2709		ret = 1;
2710		goto done;
2711		break;
2712	case RSPQ_NSOP_NEOP:
2713		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2714		panic("chaining unsupported");
2715		ret = 0;
2716		break;
2717	case RSPQ_SOP:
2718		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2719		panic("chaining unsupported");
2720		m_iovinit(m0);
2721		ret = 0;
2722		break;
2723	case RSPQ_EOP:
2724		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2725		panic("chaining unsupported");
2726		ret = 1;
2727		break;
2728	}
2729	panic("append not supported");
2730#if 0
2731	m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2732#endif
2733done:
2734	if (++fl->cidx == fl->size)
2735		fl->cidx = 0;
2736
2737	return (ret);
2738}
2739#endif
2740/**
2741 *	handle_rsp_cntrl_info - handles control information in a response
2742 *	@qs: the queue set corresponding to the response
2743 *	@flags: the response control flags
2744 *
2745 *	Handles the control information of an SGE response, such as GTS
2746 *	indications and completion credits for the queue set's Tx queues.
2747 *	HW coalesces credits, we don't do any extra SW coalescing.
2748 */
2749static __inline void
2750handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2751{
2752	unsigned int credits;
2753
2754#if USE_GTS
2755	if (flags & F_RSPD_TXQ0_GTS)
2756		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2757#endif
2758	credits = G_RSPD_TXQ0_CR(flags);
2759	if (credits)
2760		qs->txq[TXQ_ETH].processed += credits;
2761
2762	credits = G_RSPD_TXQ2_CR(flags);
2763	if (credits)
2764		qs->txq[TXQ_CTRL].processed += credits;
2765
2766# if USE_GTS
2767	if (flags & F_RSPD_TXQ1_GTS)
2768		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2769# endif
2770	credits = G_RSPD_TXQ1_CR(flags);
2771	if (credits)
2772		qs->txq[TXQ_OFLD].processed += credits;
2773
2774}
2775
2776static void
2777check_ring_db(adapter_t *adap, struct sge_qset *qs,
2778    unsigned int sleeping)
2779{
2780	;
2781}
2782
2783/**
2784 *	process_responses - process responses from an SGE response queue
2785 *	@adap: the adapter
2786 *	@qs: the queue set to which the response queue belongs
2787 *	@budget: how many responses can be processed in this round
2788 *
2789 *	Process responses from an SGE response queue up to the supplied budget.
2790 *	Responses include received packets as well as credits and other events
2791 *	for the queues that belong to the response queue's queue set.
2792 *	A negative budget is effectively unlimited.
2793 *
2794 *	Additionally choose the interrupt holdoff time for the next interrupt
2795 *	on this queue.  If the system is under memory shortage use a fairly
2796 *	long delay to help recovery.
2797 */
2798int
2799process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2800{
2801	struct sge_rspq *rspq = &qs->rspq;
2802	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2803	int budget_left = budget;
2804	unsigned int sleeping = 0;
2805#ifdef LRO_SUPPORTED
2806	int lro_enabled = qs->lro.enabled;
2807	int skip_lro;
2808	struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2809#endif
2810	struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2811	int ngathered = 0;
2812#ifdef DEBUG
2813	static int last_holdoff = 0;
2814	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2815		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2816		last_holdoff = rspq->holdoff_tmr;
2817	}
2818#endif
2819	rspq->next_holdoff = rspq->holdoff_tmr;
2820
2821	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2822		int eth, eop = 0, ethpad = 0;
2823		uint32_t flags = ntohl(r->flags);
2824		uint32_t rss_csum = *(const uint32_t *)r;
2825		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2826
2827		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2828
2829		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2830			struct mbuf *m;
2831
2832			if (cxgb_debug)
2833				printf("async notification\n");
2834
2835			if (rspq->rspq_mh.mh_head == NULL) {
2836				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2837				m = rspq->rspq_mh.mh_head;
2838			} else {
2839				m = m_gethdr(M_DONTWAIT, MT_DATA);
2840			}
2841
2842			/* XXX m is lost here if rspq->rspq_mbuf is not NULL */
2843
2844			if (m == NULL)
2845				goto no_mem;
2846
2847                        memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2848			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2849                        *mtod(m, char *) = CPL_ASYNC_NOTIF;
2850			rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
2851			eop = 1;
2852                        rspq->async_notif++;
2853			goto skip;
2854		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2855			struct mbuf *m = NULL;
2856
2857			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2858			    r->rss_hdr.opcode, rspq->cidx);
2859			if (rspq->rspq_mh.mh_head == NULL)
2860				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2861                        else
2862				m = m_gethdr(M_DONTWAIT, MT_DATA);
2863
2864			if (rspq->rspq_mh.mh_head == NULL &&  m == NULL) {
2865		no_mem:
2866				rspq->next_holdoff = NOMEM_INTR_DELAY;
2867				budget_left--;
2868				break;
2869			}
2870			get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
2871			eop = 1;
2872			rspq->imm_data++;
2873		} else if (r->len_cq) {
2874			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2875
2876#ifdef DISABLE_MBUF_IOVEC
2877			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2878#else
2879			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2880#endif
2881#ifdef IFNET_MULTIQUEUE
2882			rspq->rspq_mh.mh_head->m_flags |= M_FLOWID;
2883			rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash;
2884#endif
2885			ethpad = 2;
2886		} else {
2887			DPRINTF("pure response\n");
2888			rspq->pure_rsps++;
2889		}
2890	skip:
2891		if (flags & RSPD_CTRL_MASK) {
2892			sleeping |= flags & RSPD_GTS_MASK;
2893			handle_rsp_cntrl_info(qs, flags);
2894		}
2895
2896		r++;
2897		if (__predict_false(++rspq->cidx == rspq->size)) {
2898			rspq->cidx = 0;
2899			rspq->gen ^= 1;
2900			r = rspq->desc;
2901		}
2902		prefetch(r);
2903		if (++rspq->credits >= (rspq->size / 4)) {
2904			refill_rspq(adap, rspq, rspq->credits);
2905			rspq->credits = 0;
2906		}
2907		DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2908
2909		if (!eth && eop) {
2910			rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2911			/*
2912			 * XXX size mismatch
2913			 */
2914			m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2915
2916
2917			ngathered = rx_offload(&adap->tdev, rspq,
2918			    rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2919			rspq->rspq_mh.mh_head = NULL;
2920			DPRINTF("received offload packet\n");
2921
2922		} else if (eth && eop) {
2923			struct mbuf *m = rspq->rspq_mh.mh_head;
2924			prefetch(mtod(m, uint8_t *));
2925			prefetch(mtod(m, uint8_t *) + L1_CACHE_BYTES);
2926
2927			t3_rx_eth(adap, rspq, m, ethpad);
2928
2929#ifdef LRO_SUPPORTED
2930			/*
2931			 * The T304 sends incoming packets on any qset.  If LRO
2932			 * is also enabled, we could end up sending packet up
2933			 * lro_ctrl->ifp's input.  That is incorrect.
2934			 *
2935			 * The mbuf's rcvif was derived from the cpl header and
2936			 * is accurate.  Skip LRO and just use that.
2937			 */
2938			skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2939
2940			if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro &&
2941			    (tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
2942				/* successfully queue'd for LRO */
2943			} else
2944#endif
2945			{
2946				/*
2947				 * LRO not enabled, packet unsuitable for LRO,
2948				 * or unable to queue.  Pass it up right now in
2949				 * either case.
2950				 */
2951				struct ifnet *ifp = m->m_pkthdr.rcvif;
2952				(*ifp->if_input)(ifp, m);
2953			}
2954			DPRINTF("received tunnel packet\n");
2955			rspq->rspq_mh.mh_head = NULL;
2956
2957		}
2958		__refill_fl_lt(adap, &qs->fl[0], 32);
2959		__refill_fl_lt(adap, &qs->fl[1], 32);
2960		--budget_left;
2961	}
2962
2963	deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2964
2965#ifdef LRO_SUPPORTED
2966	/* Flush LRO */
2967	while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
2968		struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
2969		SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
2970		tcp_lro_flush(lro_ctrl, queued);
2971	}
2972#endif
2973
2974	if (sleeping)
2975		check_ring_db(adap, qs, sleeping);
2976
2977	smp_mb();  /* commit Tx queue processed updates */
2978	if (__predict_false(qs->txq_stopped > 1)) {
2979		printf("restarting tx on %p\n", qs);
2980
2981		restart_tx(qs);
2982	}
2983
2984	__refill_fl_lt(adap, &qs->fl[0], 512);
2985	__refill_fl_lt(adap, &qs->fl[1], 512);
2986	budget -= budget_left;
2987	return (budget);
2988}
2989
2990/*
2991 * A helper function that processes responses and issues GTS.
2992 */
2993static __inline int
2994process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2995{
2996	int work;
2997	static int last_holdoff = 0;
2998
2999	work = process_responses(adap, rspq_to_qset(rq), -1);
3000
3001	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3002		printf("next_holdoff=%d\n", rq->next_holdoff);
3003		last_holdoff = rq->next_holdoff;
3004	}
3005	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3006	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3007
3008	return (work);
3009}
3010
3011
3012/*
3013 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3014 * Handles data events from SGE response queues as well as error and other
3015 * async events as they all use the same interrupt pin.  We use one SGE
3016 * response queue per port in this mode and protect all response queues with
3017 * queue 0's lock.
3018 */
3019void
3020t3b_intr(void *data)
3021{
3022	uint32_t i, map;
3023	adapter_t *adap = data;
3024	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3025
3026	t3_write_reg(adap, A_PL_CLI, 0);
3027	map = t3_read_reg(adap, A_SG_DATA_INTR);
3028
3029	if (!map)
3030		return;
3031
3032	if (__predict_false(map & F_ERRINTR))
3033		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3034
3035	mtx_lock(&q0->lock);
3036	for_each_port(adap, i)
3037	    if (map & (1 << i))
3038			process_responses_gts(adap, &adap->sge.qs[i].rspq);
3039	mtx_unlock(&q0->lock);
3040}
3041
3042/*
3043 * The MSI interrupt handler.  This needs to handle data events from SGE
3044 * response queues as well as error and other async events as they all use
3045 * the same MSI vector.  We use one SGE response queue per port in this mode
3046 * and protect all response queues with queue 0's lock.
3047 */
3048void
3049t3_intr_msi(void *data)
3050{
3051	adapter_t *adap = data;
3052	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3053	int i, new_packets = 0;
3054
3055	mtx_lock(&q0->lock);
3056
3057	for_each_port(adap, i)
3058	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3059		    new_packets = 1;
3060	mtx_unlock(&q0->lock);
3061	if (new_packets == 0)
3062		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3063}
3064
3065void
3066t3_intr_msix(void *data)
3067{
3068	struct sge_qset *qs = data;
3069	adapter_t *adap = qs->port->adapter;
3070	struct sge_rspq *rspq = &qs->rspq;
3071#ifndef IFNET_MULTIQUEUE
3072	mtx_lock(&rspq->lock);
3073#else
3074	if (mtx_trylock(&rspq->lock))
3075#endif
3076	{
3077
3078		if (process_responses_gts(adap, rspq) == 0)
3079			rspq->unhandled_irqs++;
3080		mtx_unlock(&rspq->lock);
3081	}
3082}
3083
3084#define QDUMP_SBUF_SIZE		32 * 400
3085static int
3086t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3087{
3088	struct sge_rspq *rspq;
3089	struct sge_qset *qs;
3090	int i, err, dump_end, idx;
3091	static int multiplier = 1;
3092	struct sbuf *sb;
3093	struct rsp_desc *rspd;
3094	uint32_t data[4];
3095
3096	rspq = arg1;
3097	qs = rspq_to_qset(rspq);
3098	if (rspq->rspq_dump_count == 0)
3099		return (0);
3100	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3101		log(LOG_WARNING,
3102		    "dump count is too large %d\n", rspq->rspq_dump_count);
3103		rspq->rspq_dump_count = 0;
3104		return (EINVAL);
3105	}
3106	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3107		log(LOG_WARNING,
3108		    "dump start of %d is greater than queue size\n",
3109		    rspq->rspq_dump_start);
3110		rspq->rspq_dump_start = 0;
3111		return (EINVAL);
3112	}
3113	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3114	if (err)
3115		return (err);
3116retry_sbufops:
3117	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3118
3119	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3120	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3121	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3122	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3123	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3124
3125	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3126	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3127
3128	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3129	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3130		idx = i & (RSPQ_Q_SIZE-1);
3131
3132		rspd = &rspq->desc[idx];
3133		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3134		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3135		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3136		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3137		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3138		    be32toh(rspd->len_cq), rspd->intr_gen);
3139	}
3140	if (sbuf_overflowed(sb)) {
3141		sbuf_delete(sb);
3142		multiplier++;
3143		goto retry_sbufops;
3144	}
3145	sbuf_finish(sb);
3146	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3147	sbuf_delete(sb);
3148	return (err);
3149}
3150
3151static int
3152t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3153{
3154	struct sge_txq *txq;
3155	struct sge_qset *qs;
3156	int i, j, err, dump_end;
3157	static int multiplier = 1;
3158	struct sbuf *sb;
3159	struct tx_desc *txd;
3160	uint32_t *WR, wr_hi, wr_lo, gen;
3161	uint32_t data[4];
3162
3163	txq = arg1;
3164	qs = txq_to_qset(txq, TXQ_ETH);
3165	if (txq->txq_dump_count == 0) {
3166		return (0);
3167	}
3168	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3169		log(LOG_WARNING,
3170		    "dump count is too large %d\n", txq->txq_dump_count);
3171		txq->txq_dump_count = 1;
3172		return (EINVAL);
3173	}
3174	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3175		log(LOG_WARNING,
3176		    "dump start of %d is greater than queue size\n",
3177		    txq->txq_dump_start);
3178		txq->txq_dump_start = 0;
3179		return (EINVAL);
3180	}
3181	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3182	if (err)
3183		return (err);
3184
3185
3186retry_sbufops:
3187	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3188
3189	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3190	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3191	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3192	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3193	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3194	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3195	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3196	    txq->txq_dump_start,
3197	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3198
3199	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3200	for (i = txq->txq_dump_start; i < dump_end; i++) {
3201		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3202		WR = (uint32_t *)txd->flit;
3203		wr_hi = ntohl(WR[0]);
3204		wr_lo = ntohl(WR[1]);
3205		gen = G_WR_GEN(wr_lo);
3206
3207		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3208		    wr_hi, wr_lo, gen);
3209		for (j = 2; j < 30; j += 4)
3210			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3211			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3212
3213	}
3214	if (sbuf_overflowed(sb)) {
3215		sbuf_delete(sb);
3216		multiplier++;
3217		goto retry_sbufops;
3218	}
3219	sbuf_finish(sb);
3220	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3221	sbuf_delete(sb);
3222	return (err);
3223}
3224
3225static int
3226t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3227{
3228	struct sge_txq *txq;
3229	struct sge_qset *qs;
3230	int i, j, err, dump_end;
3231	static int multiplier = 1;
3232	struct sbuf *sb;
3233	struct tx_desc *txd;
3234	uint32_t *WR, wr_hi, wr_lo, gen;
3235
3236	txq = arg1;
3237	qs = txq_to_qset(txq, TXQ_CTRL);
3238	if (txq->txq_dump_count == 0) {
3239		return (0);
3240	}
3241	if (txq->txq_dump_count > 256) {
3242		log(LOG_WARNING,
3243		    "dump count is too large %d\n", txq->txq_dump_count);
3244		txq->txq_dump_count = 1;
3245		return (EINVAL);
3246	}
3247	if (txq->txq_dump_start > 255) {
3248		log(LOG_WARNING,
3249		    "dump start of %d is greater than queue size\n",
3250		    txq->txq_dump_start);
3251		txq->txq_dump_start = 0;
3252		return (EINVAL);
3253	}
3254
3255retry_sbufops:
3256	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3257	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3258	    txq->txq_dump_start,
3259	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
3260
3261	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3262	for (i = txq->txq_dump_start; i < dump_end; i++) {
3263		txd = &txq->desc[i & (255)];
3264		WR = (uint32_t *)txd->flit;
3265		wr_hi = ntohl(WR[0]);
3266		wr_lo = ntohl(WR[1]);
3267		gen = G_WR_GEN(wr_lo);
3268
3269		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3270		    wr_hi, wr_lo, gen);
3271		for (j = 2; j < 30; j += 4)
3272			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3273			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3274
3275	}
3276	if (sbuf_overflowed(sb)) {
3277		sbuf_delete(sb);
3278		multiplier++;
3279		goto retry_sbufops;
3280	}
3281	sbuf_finish(sb);
3282	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3283	sbuf_delete(sb);
3284	return (err);
3285}
3286
3287static int
3288t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3289{
3290	adapter_t *sc = arg1;
3291	struct qset_params *qsp = &sc->params.sge.qset[0];
3292	int coalesce_usecs;
3293	struct sge_qset *qs;
3294	int i, j, err, nqsets = 0;
3295	struct mtx *lock;
3296
3297	if ((sc->flags & FULL_INIT_DONE) == 0)
3298		return (ENXIO);
3299
3300	coalesce_usecs = qsp->coalesce_usecs;
3301        err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3302
3303	if (err != 0) {
3304		return (err);
3305	}
3306	if (coalesce_usecs == qsp->coalesce_usecs)
3307		return (0);
3308
3309	for (i = 0; i < sc->params.nports; i++)
3310		for (j = 0; j < sc->port[i].nqsets; j++)
3311			nqsets++;
3312
3313	coalesce_usecs = max(1, coalesce_usecs);
3314
3315	for (i = 0; i < nqsets; i++) {
3316		qs = &sc->sge.qs[i];
3317		qsp = &sc->params.sge.qset[i];
3318		qsp->coalesce_usecs = coalesce_usecs;
3319
3320		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3321			    &sc->sge.qs[0].rspq.lock;
3322
3323		mtx_lock(lock);
3324		t3_update_qset_coalesce(qs, qsp);
3325		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3326		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3327		mtx_unlock(lock);
3328	}
3329
3330	return (0);
3331}
3332
3333
3334void
3335t3_add_attach_sysctls(adapter_t *sc)
3336{
3337	struct sysctl_ctx_list *ctx;
3338	struct sysctl_oid_list *children;
3339
3340	ctx = device_get_sysctl_ctx(sc->dev);
3341	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3342
3343	/* random information */
3344	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3345	    "firmware_version",
3346	    CTLFLAG_RD, &sc->fw_version,
3347	    0, "firmware version");
3348	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3349	    "hw_revision",
3350	    CTLFLAG_RD, &sc->params.rev,
3351	    0, "chip model");
3352	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3353	    "port_types",
3354	    CTLFLAG_RD, &sc->port_types,
3355	    0, "type of ports");
3356	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3357	    "enable_debug",
3358	    CTLFLAG_RW, &cxgb_debug,
3359	    0, "enable verbose debugging output");
3360	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3361	    CTLFLAG_RD, &sc->tunq_coalesce,
3362	    "#tunneled packets freed");
3363	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3364	    "txq_overrun",
3365	    CTLFLAG_RD, &txq_fills,
3366	    0, "#times txq overrun");
3367	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3368	    "pcpu_cache_enable",
3369	    CTLFLAG_RW, &cxgb_pcpu_cache_enable,
3370	    0, "#enable driver local pcpu caches");
3371	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3372	    "multiq_tx_enable",
3373	    CTLFLAG_RW, &multiq_tx_enable,
3374	    0, "enable transmit by multiple tx queues");
3375	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3376	    "coalesce_tx_enable",
3377	    CTLFLAG_RW, &coalesce_tx_enable,
3378	    0, "coalesce small packets in work requests - WARNING ALPHA");
3379	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3380	    "wakeup_tx_thread",
3381	    CTLFLAG_RW, &wakeup_tx_thread,
3382	    0, "wakeup tx thread if no transmitter running");
3383	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3384	    "cache_alloc",
3385	    CTLFLAG_RD, &cxgb_cached_allocations,
3386	    0, "#times a cluster was allocated from cache");
3387	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3388	    "cached",
3389	    CTLFLAG_RD, &cxgb_cached,
3390	    0, "#times a cluster was cached");
3391	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3392	    "ext_freed",
3393	    CTLFLAG_RD, &cxgb_ext_freed,
3394	    0, "#times a cluster was freed through ext_free");
3395	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3396	    "ext_inited",
3397	    CTLFLAG_RD, &cxgb_ext_inited,
3398	    0, "#times a cluster was initialized for ext_free");
3399	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3400	    "mbufs_outstanding",
3401	    CTLFLAG_RD, &cxgb_mbufs_outstanding,
3402	    0, "#mbufs in flight in the driver");
3403	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3404	    "pack_outstanding",
3405	    CTLFLAG_RD, &cxgb_pack_outstanding,
3406	    0, "#packet in flight in the driver");
3407}
3408
3409
3410static const char *rspq_name = "rspq";
3411static const char *txq_names[] =
3412{
3413	"txq_eth",
3414	"txq_ofld",
3415	"txq_ctrl"
3416};
3417
3418static int
3419sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3420{
3421	struct port_info *p = arg1;
3422	uint64_t *parg;
3423
3424	if (!p)
3425		return (EINVAL);
3426
3427	parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3428
3429	PORT_LOCK(p);
3430	t3_mac_update_stats(&p->mac);
3431	PORT_UNLOCK(p);
3432
3433	return (sysctl_handle_quad(oidp, parg, 0, req));
3434}
3435
3436void
3437t3_add_configured_sysctls(adapter_t *sc)
3438{
3439	struct sysctl_ctx_list *ctx;
3440	struct sysctl_oid_list *children;
3441	int i, j;
3442
3443	ctx = device_get_sysctl_ctx(sc->dev);
3444	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3445
3446	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3447	    "intr_coal",
3448	    CTLTYPE_INT|CTLFLAG_RW, sc,
3449	    0, t3_set_coalesce_usecs,
3450	    "I", "interrupt coalescing timer (us)");
3451
3452	for (i = 0; i < sc->params.nports; i++) {
3453		struct port_info *pi = &sc->port[i];
3454		struct sysctl_oid *poid;
3455		struct sysctl_oid_list *poidlist;
3456		struct mac_stats *mstats = &pi->mac.stats;
3457
3458		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3459		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3460		    pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3461		poidlist = SYSCTL_CHILDREN(poid);
3462		SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3463		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3464		    0, "#queue sets");
3465
3466		for (j = 0; j < pi->nqsets; j++) {
3467			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3468			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3469					  *ctrlqpoid, *lropoid;
3470			struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3471					       *txqpoidlist, *ctrlqpoidlist,
3472					       *lropoidlist;
3473			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3474
3475			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3476
3477			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3478			    qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3479			qspoidlist = SYSCTL_CHILDREN(qspoid);
3480
3481			SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3482					CTLFLAG_RD, &qs->fl[0].empty, 0,
3483					"freelist #0 empty");
3484			SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3485					CTLFLAG_RD, &qs->fl[1].empty, 0,
3486					"freelist #1 empty");
3487
3488			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3489			    rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3490			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3491
3492			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3493			    txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3494			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3495
3496			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3497			    txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3498			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3499
3500			lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3501			    "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3502			lropoidlist = SYSCTL_CHILDREN(lropoid);
3503
3504			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3505			    CTLFLAG_RD, &qs->rspq.size,
3506			    0, "#entries in response queue");
3507			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3508			    CTLFLAG_RD, &qs->rspq.cidx,
3509			    0, "consumer index");
3510			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3511			    CTLFLAG_RD, &qs->rspq.credits,
3512			    0, "#credits");
3513			SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3514			    CTLFLAG_RD, &qs->rspq.phys_addr,
3515			    "physical_address_of the queue");
3516			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3517			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3518			    0, "start rspq dump entry");
3519			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3520			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3521			    0, "#rspq entries to dump");
3522			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3523			    CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3524			    0, t3_dump_rspq, "A", "dump of the response queue");
3525
3526
3527			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3528			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3529			    0, "#tunneled packets dropped");
3530			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3531			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3532			    0, "#tunneled packets waiting to be sent");
3533#if 0
3534			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3535			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3536			    0, "#tunneled packets queue producer index");
3537			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3538			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3539			    0, "#tunneled packets queue consumer index");
3540#endif
3541			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3542			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3543			    0, "#tunneled packets processed by the card");
3544			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3545			    CTLFLAG_RD, &txq->cleaned,
3546			    0, "#tunneled packets cleaned");
3547			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3548			    CTLFLAG_RD, &txq->in_use,
3549			    0, "#tunneled packet slots in use");
3550			SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3551			    CTLFLAG_RD, &txq->txq_frees,
3552			    "#tunneled packets freed");
3553			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3554			    CTLFLAG_RD, &txq->txq_skipped,
3555			    0, "#tunneled packet descriptors skipped");
3556			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3557			    CTLFLAG_RD, &txq->txq_coalesced,
3558			    0, "#tunneled packets coalesced");
3559			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3560			    CTLFLAG_RD, &txq->txq_enqueued,
3561			    0, "#tunneled packets enqueued to hardware");
3562			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3563			    CTLFLAG_RD, &qs->txq_stopped,
3564			    0, "tx queues stopped");
3565			SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3566			    CTLFLAG_RD, &txq->phys_addr,
3567			    "physical_address_of the queue");
3568			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3569			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3570			    0, "txq generation");
3571			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3572			    CTLFLAG_RD, &txq->cidx,
3573			    0, "hardware queue cidx");
3574			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3575			    CTLFLAG_RD, &txq->pidx,
3576			    0, "hardware queue pidx");
3577			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3578			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3579			    0, "txq start idx for dump");
3580			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3581			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3582			    0, "txq #entries to dump");
3583			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3584			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3585			    0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3586
3587			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3588			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3589			    0, "ctrlq start idx for dump");
3590			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3591			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3592			    0, "ctrl #entries to dump");
3593			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3594			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3595			    0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3596
3597#ifdef LRO_SUPPORTED
3598			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3599			    CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3600			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3601			    CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3602			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3603			    CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3604			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3605			    CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3606#endif
3607		}
3608
3609		/* Now add a node for mac stats. */
3610		poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3611		    CTLFLAG_RD, NULL, "MAC statistics");
3612		poidlist = SYSCTL_CHILDREN(poid);
3613
3614		/*
3615		 * We (ab)use the length argument (arg2) to pass on the offset
3616		 * of the data that we are interested in.  This is only required
3617		 * for the quad counters that are updated from the hardware (we
3618		 * make sure that we return the latest value).
3619		 * sysctl_handle_macstat first updates *all* the counters from
3620		 * the hardware, and then returns the latest value of the
3621		 * requested counter.  Best would be to update only the
3622		 * requested counter from hardware, but t3_mac_update_stats()
3623		 * hides all the register details and we don't want to dive into
3624		 * all that here.
3625		 */
3626#define CXGB_SYSCTL_ADD_QUAD(a)	SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3627    (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3628    sysctl_handle_macstat, "QU", 0)
3629		CXGB_SYSCTL_ADD_QUAD(tx_octets);
3630		CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3631		CXGB_SYSCTL_ADD_QUAD(tx_frames);
3632		CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3633		CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3634		CXGB_SYSCTL_ADD_QUAD(tx_pause);
3635		CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3636		CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3637		CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3638		CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3639		CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3640		CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3641		CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3642		CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3643		CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3644		CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3645		CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3646		CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3647		CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3648		CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3649		CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3650		CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3651		CXGB_SYSCTL_ADD_QUAD(rx_octets);
3652		CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3653		CXGB_SYSCTL_ADD_QUAD(rx_frames);
3654		CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3655		CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3656		CXGB_SYSCTL_ADD_QUAD(rx_pause);
3657		CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3658		CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3659		CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3660		CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3661		CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3662		CXGB_SYSCTL_ADD_QUAD(rx_runt);
3663		CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3664		CXGB_SYSCTL_ADD_QUAD(rx_short);
3665		CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3666		CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3667		CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3668		CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3669		CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3670		CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3671		CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3672		CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3673		CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3674		CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3675#undef CXGB_SYSCTL_ADD_QUAD
3676
3677#define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3678    CTLFLAG_RD, &mstats->a, 0)
3679		CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3680		CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3681		CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3682		CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3683		CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3684		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3685		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3686		CXGB_SYSCTL_ADD_ULONG(num_toggled);
3687		CXGB_SYSCTL_ADD_ULONG(num_resets);
3688		CXGB_SYSCTL_ADD_ULONG(link_faults);
3689#undef CXGB_SYSCTL_ADD_ULONG
3690	}
3691}
3692
3693/**
3694 *	t3_get_desc - dump an SGE descriptor for debugging purposes
3695 *	@qs: the queue set
3696 *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3697 *	@idx: the descriptor index in the queue
3698 *	@data: where to dump the descriptor contents
3699 *
3700 *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3701 *	size of the descriptor.
3702 */
3703int
3704t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3705		unsigned char *data)
3706{
3707	if (qnum >= 6)
3708		return (EINVAL);
3709
3710	if (qnum < 3) {
3711		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3712			return -EINVAL;
3713		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3714		return sizeof(struct tx_desc);
3715	}
3716
3717	if (qnum == 3) {
3718		if (!qs->rspq.desc || idx >= qs->rspq.size)
3719			return (EINVAL);
3720		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3721		return sizeof(struct rsp_desc);
3722	}
3723
3724	qnum -= 4;
3725	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3726		return (EINVAL);
3727	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3728	return sizeof(struct rx_desc);
3729}
3730