Deleted Added
sdiff udiff text old ( 181653 ) new ( 182679 )
full compact
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29#define DEBUG_BUFRING
30
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 182679 2008-09-02 07:47:14Z kmacy $");
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/module.h>
39#include <sys/bus.h>
40#include <sys/conf.h>
41#include <machine/bus.h>
42#include <machine/resource.h>
43#include <sys/bus_dma.h>
44#include <sys/rman.h>
45#include <sys/queue.h>
46#include <sys/sysctl.h>
47#include <sys/taskqueue.h>
48
49#include <sys/proc.h>
50#include <sys/sbuf.h>
51#include <sys/sched.h>
52#include <sys/smp.h>
53#include <sys/systm.h>
54#include <sys/syslog.h>
55
56#include <netinet/in_systm.h>
57#include <netinet/in.h>
58#include <netinet/ip.h>
59#include <netinet/tcp.h>
60
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66
67#ifdef CONFIG_DEFINED
68#include <cxgb_include.h>
69#include <sys/mvec.h>
70#else
71#include <dev/cxgb/cxgb_include.h>
72#include <dev/cxgb/sys/mvec.h>
73#endif
74
75int txq_fills = 0;
76/*
77 * XXX don't re-enable this until TOE stops assuming
78 * we have an m_ext
79 */
80static int recycle_enable = 0;
81extern int cxgb_txq_buf_ring_size;
82int cxgb_cached_allocations;
83int cxgb_cached;
84int cxgb_ext_freed = 0;
85int cxgb_ext_inited = 0;
86int fl_q_size = 0;
87int jumbo_q_size = 0;
88
89extern int cxgb_use_16k_clusters;
90extern int cxgb_pcpu_cache_enable;
91extern int nmbjumbo4;
92extern int nmbjumbo9;
93extern int nmbjumbo16;
94
95
96
97
98#define USE_GTS 0
99
100#define SGE_RX_SM_BUF_SIZE 1536
101#define SGE_RX_DROP_THRES 16
102#define SGE_RX_COPY_THRES 128
103
104/*
105 * Period of the Tx buffer reclaim timer. This timer does not need to run
106 * frequently as Tx buffers are usually reclaimed by new Tx packets.
107 */
108#define TX_RECLAIM_PERIOD (hz >> 1)
109
110/*
111 * Values for sge_txq.flags
112 */
113enum {
114 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
115 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
116};
117
118struct tx_desc {
119 uint64_t flit[TX_DESC_FLITS];
120} __packed;
121
122struct rx_desc {
123 uint32_t addr_lo;
124 uint32_t len_gen;
125 uint32_t gen2;
126 uint32_t addr_hi;
127} __packed;;
128
129struct rsp_desc { /* response queue descriptor */
130 struct rss_header rss_hdr;
131 uint32_t flags;
132 uint32_t len_cq;
133 uint8_t imm_data[47];
134 uint8_t intr_gen;
135} __packed;
136
137#define RX_SW_DESC_MAP_CREATED (1 << 0)
138#define TX_SW_DESC_MAP_CREATED (1 << 1)
139#define RX_SW_DESC_INUSE (1 << 3)
140#define TX_SW_DESC_MAPPED (1 << 4)
141
142#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
143#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
144#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
145#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
146
147struct tx_sw_desc { /* SW state per Tx descriptor */
148 struct mbuf_iovec mi;
149 bus_dmamap_t map;
150 int flags;
151};
152
153struct rx_sw_desc { /* SW state per Rx descriptor */
154 caddr_t rxsd_cl;
155 caddr_t data;
156 bus_dmamap_t map;
157 int flags;
158};
159
160struct txq_state {
161 unsigned int compl;
162 unsigned int gen;
163 unsigned int pidx;
164};
165
166struct refill_fl_cb_arg {
167 int error;
168 bus_dma_segment_t seg;
169 int nseg;
170};
171
172/*
173 * Maps a number of flits to the number of Tx descriptors that can hold them.
174 * The formula is
175 *
176 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
177 *
178 * HW allows up to 4 descriptors to be combined into a WR.
179 */
180static uint8_t flit_desc_map[] = {
181 0,
182#if SGE_NUM_GENBITS == 1
183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
185 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
186 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
187#elif SGE_NUM_GENBITS == 2
188 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
189 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
190 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
191 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
192#else
193# error "SGE_NUM_GENBITS must be 1 or 2"
194#endif
195};
196
197
198int cxgb_debug = 0;
199
200static void sge_timer_cb(void *arg);
201static void sge_timer_reclaim(void *arg, int ncount);
202static void sge_txq_reclaim_handler(void *arg, int ncount);
203
204/**
205 * reclaim_completed_tx - reclaims completed Tx descriptors
206 * @adapter: the adapter
207 * @q: the Tx queue to reclaim completed descriptors from
208 *
209 * Reclaims Tx descriptors that the SGE has indicated it has processed,
210 * and frees the associated buffers if possible. Called with the Tx
211 * queue's lock held.
212 */
213static __inline int
214reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
215{
216 int reclaim = desc_reclaimable(q);
217
218 if (reclaim < reclaim_min)
219 return (0);
220
221 mtx_assert(&q->lock, MA_OWNED);
222 if (reclaim > 0) {
223 t3_free_tx_desc(q, reclaim);
224 q->cleaned += reclaim;
225 q->in_use -= reclaim;
226 }
227 return (reclaim);
228}
229
230/**
231 * should_restart_tx - are there enough resources to restart a Tx queue?
232 * @q: the Tx queue
233 *
234 * Checks if there are enough descriptors to restart a suspended Tx queue.
235 */
236static __inline int
237should_restart_tx(const struct sge_txq *q)
238{
239 unsigned int r = q->processed - q->cleaned;
240
241 return q->in_use - r < (q->size >> 1);
242}
243
244/**
245 * t3_sge_init - initialize SGE
246 * @adap: the adapter
247 * @p: the SGE parameters
248 *
249 * Performs SGE initialization needed every time after a chip reset.
250 * We do not initialize any of the queue sets here, instead the driver
251 * top-level must request those individually. We also do not enable DMA
252 * here, that should be done after the queues have been set up.
253 */
254void
255t3_sge_init(adapter_t *adap, struct sge_params *p)
256{
257 u_int ctrl, ups;
258
259 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
260
261 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
262 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
263 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
264 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
265#if SGE_NUM_GENBITS == 1
266 ctrl |= F_EGRGENCTRL;
267#endif
268 if (adap->params.rev > 0) {
269 if (!(adap->flags & (USING_MSIX | USING_MSI)))
270 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
271 }
272 t3_write_reg(adap, A_SG_CONTROL, ctrl);
273 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
274 V_LORCQDRBTHRSH(512));
275 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
276 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
277 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
278 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
279 adap->params.rev < T3_REV_C ? 1000 : 500);
280 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
281 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
282 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
283 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
284 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
285}
286
287
288/**
289 * sgl_len - calculates the size of an SGL of the given capacity
290 * @n: the number of SGL entries
291 *
292 * Calculates the number of flits needed for a scatter/gather list that
293 * can hold the given number of entries.
294 */
295static __inline unsigned int
296sgl_len(unsigned int n)
297{
298 return ((3 * n) / 2 + (n & 1));
299}
300
301/**
302 * get_imm_packet - return the next ingress packet buffer from a response
303 * @resp: the response descriptor containing the packet data
304 *
305 * Return a packet containing the immediate data of the given response.
306 */
307static int
308get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
309{
310
311 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
312 m->m_ext.ext_buf = NULL;
313 m->m_ext.ext_type = 0;
314 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
315 return (0);
316}
317
318static __inline u_int
319flits_to_desc(u_int n)
320{
321 return (flit_desc_map[n]);
322}
323
324#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
325 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
326 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
327 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
328 F_HIRCQPARITYERROR)
329#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
330#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
331 F_RSPQDISABLED)
332
333/**
334 * t3_sge_err_intr_handler - SGE async event interrupt handler
335 * @adapter: the adapter
336 *
337 * Interrupt handler for SGE asynchronous (non-data) events.
338 */
339void
340t3_sge_err_intr_handler(adapter_t *adapter)
341{
342 unsigned int v, status;
343
344 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
345 if (status & SGE_PARERR)
346 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
347 status & SGE_PARERR);
348 if (status & SGE_FRAMINGERR)
349 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
350 status & SGE_FRAMINGERR);
351 if (status & F_RSPQCREDITOVERFOW)
352 CH_ALERT(adapter, "SGE response queue credit overflow\n");
353
354 if (status & F_RSPQDISABLED) {
355 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
356
357 CH_ALERT(adapter,
358 "packet delivered to disabled response queue (0x%x)\n",
359 (v >> S_RSPQ0DISABLED) & 0xff);
360 }
361
362 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
363 if (status & SGE_FATALERR)
364 t3_fatal_err(adapter);
365}
366
367void
368t3_sge_prep(adapter_t *adap, struct sge_params *p)
369{
370 int i, nqsets;
371
372 nqsets = min(SGE_QSETS, mp_ncpus*4);
373
374 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
375
376 while (!powerof2(fl_q_size))
377 fl_q_size--;
378#if __FreeBSD_version > 800000
379 if (cxgb_use_16k_clusters)
380 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
381 else
382 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
383#else
384 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
385#endif
386 while (!powerof2(jumbo_q_size))
387 jumbo_q_size--;
388
389 /* XXX Does ETHER_ALIGN need to be accounted for here? */
390 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
391
392 for (i = 0; i < SGE_QSETS; ++i) {
393 struct qset_params *q = p->qset + i;
394
395 if (adap->params.nports > 2) {
396 q->coalesce_usecs = 50;
397 } else {
398#ifdef INVARIANTS
399 q->coalesce_usecs = 10;
400#else
401 q->coalesce_usecs = 5;
402#endif
403 }
404 q->polling = 0;
405 q->rspq_size = RSPQ_Q_SIZE;
406 q->fl_size = fl_q_size;
407 q->jumbo_size = jumbo_q_size;
408 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
409 q->txq_size[TXQ_OFLD] = 1024;
410 q->txq_size[TXQ_CTRL] = 256;
411 q->cong_thres = 0;
412 }
413}
414
415int
416t3_sge_alloc(adapter_t *sc)
417{
418
419 /* The parent tag. */
420 if (bus_dma_tag_create( NULL, /* parent */
421 1, 0, /* algnmnt, boundary */
422 BUS_SPACE_MAXADDR, /* lowaddr */
423 BUS_SPACE_MAXADDR, /* highaddr */
424 NULL, NULL, /* filter, filterarg */
425 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
426 BUS_SPACE_UNRESTRICTED, /* nsegments */
427 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
428 0, /* flags */
429 NULL, NULL, /* lock, lockarg */
430 &sc->parent_dmat)) {
431 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
432 return (ENOMEM);
433 }
434
435 /*
436 * DMA tag for normal sized RX frames
437 */
438 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
439 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
440 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
441 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
442 return (ENOMEM);
443 }
444
445 /*
446 * DMA tag for jumbo sized RX frames.
447 */
448 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
449 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
450 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
451 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
452 return (ENOMEM);
453 }
454
455 /*
456 * DMA tag for TX frames.
457 */
458 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
459 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
460 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
461 NULL, NULL, &sc->tx_dmat)) {
462 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
463 return (ENOMEM);
464 }
465
466 return (0);
467}
468
469int
470t3_sge_free(struct adapter * sc)
471{
472
473 if (sc->tx_dmat != NULL)
474 bus_dma_tag_destroy(sc->tx_dmat);
475
476 if (sc->rx_jumbo_dmat != NULL)
477 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
478
479 if (sc->rx_dmat != NULL)
480 bus_dma_tag_destroy(sc->rx_dmat);
481
482 if (sc->parent_dmat != NULL)
483 bus_dma_tag_destroy(sc->parent_dmat);
484
485 return (0);
486}
487
488void
489t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
490{
491
492 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
493 qs->rspq.polling = 0 /* p->polling */;
494}
495
496#if !defined(__i386__) && !defined(__amd64__)
497static void
498refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
499{
500 struct refill_fl_cb_arg *cb_arg = arg;
501
502 cb_arg->error = error;
503 cb_arg->seg = segs[0];
504 cb_arg->nseg = nseg;
505
506}
507#endif
508/**
509 * refill_fl - refill an SGE free-buffer list
510 * @sc: the controller softc
511 * @q: the free-list to refill
512 * @n: the number of new buffers to allocate
513 *
514 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
515 * The caller must assure that @n does not exceed the queue's capacity.
516 */
517static void
518refill_fl(adapter_t *sc, struct sge_fl *q, int n)
519{
520 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
521 struct rx_desc *d = &q->desc[q->pidx];
522 struct refill_fl_cb_arg cb_arg;
523 caddr_t cl;
524 int err, count = 0;
525 int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
526
527 cb_arg.error = 0;
528 while (n--) {
529 /*
530 * We only allocate a cluster, mbuf allocation happens after rx
531 */
532 if ((cl = cxgb_cache_get(q->zone)) == NULL) {
533 log(LOG_WARNING, "Failed to allocate cluster\n");
534 goto done;
535 }
536
537 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
538 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
539 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
540 uma_zfree(q->zone, cl);
541 goto done;
542 }
543 sd->flags |= RX_SW_DESC_MAP_CREATED;
544 }
545#if !defined(__i386__) && !defined(__amd64__)
546 err = bus_dmamap_load(q->entry_tag, sd->map,
547 cl + header_size, q->buf_size,
548 refill_fl_cb, &cb_arg, 0);
549
550 if (err != 0 || cb_arg.error) {
551 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
552 /*
553 * XXX free cluster
554 */
555 return;
556 }
557#else
558 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
559#endif
560 sd->flags |= RX_SW_DESC_INUSE;
561 sd->rxsd_cl = cl;
562 sd->data = cl + header_size;
563 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
564 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
565 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
566 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
567
568 d++;
569 sd++;
570
571 if (++q->pidx == q->size) {
572 q->pidx = 0;
573 q->gen ^= 1;
574 sd = q->sdesc;
575 d = q->desc;
576 }
577 q->credits++;
578 count++;
579 }
580
581done:
582 if (count)
583 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
584}
585
586
587/**
588 * free_rx_bufs - free the Rx buffers on an SGE free list
589 * @sc: the controle softc
590 * @q: the SGE free list to clean up
591 *
592 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
593 * this queue should be stopped before calling this function.
594 */
595static void
596free_rx_bufs(adapter_t *sc, struct sge_fl *q)
597{
598 u_int cidx = q->cidx;
599
600 while (q->credits--) {
601 struct rx_sw_desc *d = &q->sdesc[cidx];
602
603 if (d->flags & RX_SW_DESC_INUSE) {
604 bus_dmamap_unload(q->entry_tag, d->map);
605 bus_dmamap_destroy(q->entry_tag, d->map);
606 uma_zfree(q->zone, d->rxsd_cl);
607 }
608 d->rxsd_cl = NULL;
609 if (++cidx == q->size)
610 cidx = 0;
611 }
612}
613
614static __inline void
615__refill_fl(adapter_t *adap, struct sge_fl *fl)
616{
617 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
618}
619
620static __inline void
621__refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
622{
623 if ((fl->size - fl->credits) < max)
624 refill_fl(adap, fl, min(max, fl->size - fl->credits));
625}
626
627void
628refill_fl_service(adapter_t *adap, struct sge_fl *fl)
629{
630 __refill_fl_lt(adap, fl, 512);
631}
632
633/**
634 * recycle_rx_buf - recycle a receive buffer
635 * @adapter: the adapter
636 * @q: the SGE free list
637 * @idx: index of buffer to recycle
638 *
639 * Recycles the specified buffer on the given free list by adding it at
640 * the next available slot on the list.
641 */
642static void
643recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
644{
645 struct rx_desc *from = &q->desc[idx];
646 struct rx_desc *to = &q->desc[q->pidx];
647
648 q->sdesc[q->pidx] = q->sdesc[idx];
649 to->addr_lo = from->addr_lo; // already big endian
650 to->addr_hi = from->addr_hi; // likewise
651 wmb();
652 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
653 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
654 q->credits++;
655
656 if (++q->pidx == q->size) {
657 q->pidx = 0;
658 q->gen ^= 1;
659 }
660 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
661}
662
663static void
664alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
665{
666 uint32_t *addr;
667
668 addr = arg;
669 *addr = segs[0].ds_addr;
670}
671
672static int
673alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
674 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
675 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
676{
677 size_t len = nelem * elem_size;
678 void *s = NULL;
679 void *p = NULL;
680 int err;
681
682 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
683 BUS_SPACE_MAXADDR_32BIT,
684 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
685 len, 0, NULL, NULL, tag)) != 0) {
686 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
687 return (ENOMEM);
688 }
689
690 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
691 map)) != 0) {
692 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
693 return (ENOMEM);
694 }
695
696 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
697 bzero(p, len);
698 *(void **)desc = p;
699
700 if (sw_size) {
701 len = nelem * sw_size;
702 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
703 *(void **)sdesc = s;
704 }
705 if (parent_entry_tag == NULL)
706 return (0);
707
708 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
709 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
710 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
711 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
712 NULL, NULL, entry_tag)) != 0) {
713 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
714 return (ENOMEM);
715 }
716 return (0);
717}
718
719static void
720sge_slow_intr_handler(void *arg, int ncount)
721{
722 adapter_t *sc = arg;
723
724 t3_slow_intr_handler(sc);
725}
726
727/**
728 * sge_timer_cb - perform periodic maintenance of an SGE qset
729 * @data: the SGE queue set to maintain
730 *
731 * Runs periodically from a timer to perform maintenance of an SGE queue
732 * set. It performs two tasks:
733 *
734 * a) Cleans up any completed Tx descriptors that may still be pending.
735 * Normal descriptor cleanup happens when new packets are added to a Tx
736 * queue so this timer is relatively infrequent and does any cleanup only
737 * if the Tx queue has not seen any new packets in a while. We make a
738 * best effort attempt to reclaim descriptors, in that we don't wait
739 * around if we cannot get a queue's lock (which most likely is because
740 * someone else is queueing new packets and so will also handle the clean
741 * up). Since control queues use immediate data exclusively we don't
742 * bother cleaning them up here.
743 *
744 * b) Replenishes Rx queues that have run out due to memory shortage.
745 * Normally new Rx buffers are added when existing ones are consumed but
746 * when out of memory a queue can become empty. We try to add only a few
747 * buffers here, the queue will be replenished fully as these new buffers
748 * are used up if memory shortage has subsided.
749 *
750 * c) Return coalesced response queue credits in case a response queue is
751 * starved.
752 *
753 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
754 * fifo overflows and the FW doesn't implement any recovery scheme yet.
755 */
756static void
757sge_timer_cb(void *arg)
758{
759 adapter_t *sc = arg;
760#ifndef IFNET_MULTIQUEUE
761 struct port_info *pi;
762 struct sge_qset *qs;
763 struct sge_txq *txq;
764 int i, j;
765 int reclaim_ofl, refill_rx;
766
767 for (i = 0; i < sc->params.nports; i++)
768 for (j = 0; j < sc->port[i].nqsets; j++) {
769 qs = &sc->sge.qs[i + j];
770 txq = &qs->txq[0];
771 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
772 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
773 (qs->fl[1].credits < qs->fl[1].size));
774 if (reclaim_ofl || refill_rx) {
775 pi = &sc->port[i];
776 taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task);
777 break;
778 }
779 }
780#endif
781 if (sc->params.nports > 2) {
782 int i;
783
784 for_each_port(sc, i) {
785 struct port_info *pi = &sc->port[i];
786
787 t3_write_reg(sc, A_SG_KDOORBELL,
788 F_SELEGRCNTX |
789 (FW_TUNNEL_SGEEC_START + pi->first_qset));
790 }
791 }
792 if (sc->open_device_map != 0)
793 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
794}
795
796/*
797 * This is meant to be a catch-all function to keep sge state private
798 * to sge.c
799 *
800 */
801int
802t3_sge_init_adapter(adapter_t *sc)
803{
804 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
805 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
806 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
807 mi_init();
808 cxgb_cache_init();
809 return (0);
810}
811
812int
813t3_sge_reset_adapter(adapter_t *sc)
814{
815 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
816 return (0);
817}
818
819int
820t3_sge_init_port(struct port_info *pi)
821{
822 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
823 return (0);
824}
825
826void
827t3_sge_deinit_sw(adapter_t *sc)
828{
829
830 mi_deinit();
831}
832
833/**
834 * refill_rspq - replenish an SGE response queue
835 * @adapter: the adapter
836 * @q: the response queue to replenish
837 * @credits: how many new responses to make available
838 *
839 * Replenishes a response queue by making the supplied number of responses
840 * available to HW.
841 */
842static __inline void
843refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
844{
845
846 /* mbufs are allocated on demand when a rspq entry is processed. */
847 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
848 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
849}
850
851static __inline void
852sge_txq_reclaim_(struct sge_txq *txq, int force)
853{
854
855 if (desc_reclaimable(txq) < 16)
856 return;
857 if (mtx_trylock(&txq->lock) == 0)
858 return;
859 reclaim_completed_tx_(txq, 16);
860 mtx_unlock(&txq->lock);
861
862}
863
864static void
865sge_txq_reclaim_handler(void *arg, int ncount)
866{
867 struct sge_txq *q = arg;
868
869 sge_txq_reclaim_(q, TRUE);
870}
871
872
873
874static void
875sge_timer_reclaim(void *arg, int ncount)
876{
877 struct port_info *pi = arg;
878 int i, nqsets = pi->nqsets;
879 adapter_t *sc = pi->adapter;
880 struct sge_qset *qs;
881 struct sge_txq *txq;
882 struct mtx *lock;
883
884#ifdef IFNET_MULTIQUEUE
885 panic("%s should not be called with multiqueue support\n", __FUNCTION__);
886#endif
887 for (i = 0; i < nqsets; i++) {
888 qs = &sc->sge.qs[i];
889
890 txq = &qs->txq[TXQ_OFLD];
891 sge_txq_reclaim_(txq, FALSE);
892
893 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
894 &sc->sge.qs[0].rspq.lock;
895
896 if (mtx_trylock(lock)) {
897 /* XXX currently assume that we are *NOT* polling */
898 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
899
900 if (qs->fl[0].credits < qs->fl[0].size - 16)
901 __refill_fl(sc, &qs->fl[0]);
902 if (qs->fl[1].credits < qs->fl[1].size - 16)
903 __refill_fl(sc, &qs->fl[1]);
904
905 if (status & (1 << qs->rspq.cntxt_id)) {
906 if (qs->rspq.credits) {
907 refill_rspq(sc, &qs->rspq, 1);
908 qs->rspq.credits--;
909 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
910 1 << qs->rspq.cntxt_id);
911 }
912 }
913 mtx_unlock(lock);
914 }
915 }
916}
917
918/**
919 * init_qset_cntxt - initialize an SGE queue set context info
920 * @qs: the queue set
921 * @id: the queue set id
922 *
923 * Initializes the TIDs and context ids for the queues of a queue set.
924 */
925static void
926init_qset_cntxt(struct sge_qset *qs, u_int id)
927{
928
929 qs->rspq.cntxt_id = id;
930 qs->fl[0].cntxt_id = 2 * id;
931 qs->fl[1].cntxt_id = 2 * id + 1;
932 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
933 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
934 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
935 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
936 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
937
938 mbufq_init(&qs->txq[TXQ_ETH].sendq);
939 mbufq_init(&qs->txq[TXQ_OFLD].sendq);
940 mbufq_init(&qs->txq[TXQ_CTRL].sendq);
941}
942
943
944static void
945txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
946{
947 txq->in_use += ndesc;
948 /*
949 * XXX we don't handle stopping of queue
950 * presumably start handles this when we bump against the end
951 */
952 txqs->gen = txq->gen;
953 txq->unacked += ndesc;
954 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
955 txq->unacked &= 31;
956 txqs->pidx = txq->pidx;
957 txq->pidx += ndesc;
958#ifdef INVARIANTS
959 if (((txqs->pidx > txq->cidx) &&
960 (txq->pidx < txqs->pidx) &&
961 (txq->pidx >= txq->cidx)) ||
962 ((txqs->pidx < txq->cidx) &&
963 (txq->pidx >= txq-> cidx)) ||
964 ((txqs->pidx < txq->cidx) &&
965 (txq->cidx < txqs->pidx)))
966 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
967 txqs->pidx, txq->pidx, txq->cidx);
968#endif
969 if (txq->pidx >= txq->size) {
970 txq->pidx -= txq->size;
971 txq->gen ^= 1;
972 }
973
974}
975
976/**
977 * calc_tx_descs - calculate the number of Tx descriptors for a packet
978 * @m: the packet mbufs
979 * @nsegs: the number of segments
980 *
981 * Returns the number of Tx descriptors needed for the given Ethernet
982 * packet. Ethernet packets require addition of WR and CPL headers.
983 */
984static __inline unsigned int
985calc_tx_descs(const struct mbuf *m, int nsegs)
986{
987 unsigned int flits;
988
989 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
990 return 1;
991
992 flits = sgl_len(nsegs) + 2;
993#ifdef TSO_SUPPORTED
994 if (m->m_pkthdr.csum_flags & CSUM_TSO)
995 flits++;
996#endif
997 return flits_to_desc(flits);
998}
999
1000static unsigned int
1001busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1002 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1003{
1004 struct mbuf *m0;
1005 int err, pktlen, pass = 0;
1006
1007retry:
1008 err = 0;
1009 m0 = *m;
1010 pktlen = m0->m_pkthdr.len;
1011#if defined(__i386__) || defined(__amd64__)
1012 if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
1013 goto done;
1014 } else
1015#endif
1016 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
1017
1018 if (err == 0) {
1019 goto done;
1020 }
1021 if (err == EFBIG && pass == 0) {
1022 pass = 1;
1023 /* Too many segments, try to defrag */
1024 m0 = m_defrag(m0, M_DONTWAIT);
1025 if (m0 == NULL) {
1026 m_freem(*m);
1027 *m = NULL;
1028 return (ENOBUFS);
1029 }
1030 *m = m0;
1031 goto retry;
1032 } else if (err == ENOMEM) {
1033 return (err);
1034 } if (err) {
1035 if (cxgb_debug)
1036 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1037 m_freem(m0);
1038 *m = NULL;
1039 return (err);
1040 }
1041done:
1042#if !defined(__i386__) && !defined(__amd64__)
1043 bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1044#endif
1045 txsd->flags |= TX_SW_DESC_MAPPED;
1046
1047 return (0);
1048}
1049
1050/**
1051 * make_sgl - populate a scatter/gather list for a packet
1052 * @sgp: the SGL to populate
1053 * @segs: the packet dma segments
1054 * @nsegs: the number of segments
1055 *
1056 * Generates a scatter/gather list for the buffers that make up a packet
1057 * and returns the SGL size in 8-byte words. The caller must size the SGL
1058 * appropriately.
1059 */
1060static __inline void
1061make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1062{
1063 int i, idx;
1064
1065 for (idx = 0, i = 0; i < nsegs; i++) {
1066 /*
1067 * firmware doesn't like empty segments
1068 */
1069 if (segs[i].ds_len == 0)
1070 continue;
1071 if (i && idx == 0)
1072 ++sgp;
1073
1074 sgp->len[idx] = htobe32(segs[i].ds_len);
1075 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1076 idx ^= 1;
1077 }
1078
1079 if (idx) {
1080 sgp->len[idx] = 0;
1081 sgp->addr[idx] = 0;
1082 }
1083}
1084
1085/**
1086 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1087 * @adap: the adapter
1088 * @q: the Tx queue
1089 *
1090 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
1091 * where the HW is going to sleep just after we checked, however,
1092 * then the interrupt handler will detect the outstanding TX packet
1093 * and ring the doorbell for us.
1094 *
1095 * When GTS is disabled we unconditionally ring the doorbell.
1096 */
1097static __inline void
1098check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1099{
1100#if USE_GTS
1101 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1102 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1103 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1104#ifdef T3_TRACE
1105 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1106 q->cntxt_id);
1107#endif
1108 t3_write_reg(adap, A_SG_KDOORBELL,
1109 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1110 }
1111#else
1112 wmb(); /* write descriptors before telling HW */
1113 t3_write_reg(adap, A_SG_KDOORBELL,
1114 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1115#endif
1116}
1117
1118static __inline void
1119wr_gen2(struct tx_desc *d, unsigned int gen)
1120{
1121#if SGE_NUM_GENBITS == 2
1122 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1123#endif
1124}
1125
1126/**
1127 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1128 * @ndesc: number of Tx descriptors spanned by the SGL
1129 * @txd: first Tx descriptor to be written
1130 * @txqs: txq state (generation and producer index)
1131 * @txq: the SGE Tx queue
1132 * @sgl: the SGL
1133 * @flits: number of flits to the start of the SGL in the first descriptor
1134 * @sgl_flits: the SGL size in flits
1135 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1136 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1137 *
1138 * Write a work request header and an associated SGL. If the SGL is
1139 * small enough to fit into one Tx descriptor it has already been written
1140 * and we just need to write the WR header. Otherwise we distribute the
1141 * SGL across the number of descriptors it spans.
1142 */
1143static void
1144write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1145 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1146 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1147{
1148
1149 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1150 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1151
1152 if (__predict_true(ndesc == 1)) {
1153 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1154 V_WR_SGLSFLT(flits)) | wr_hi;
1155 wmb();
1156 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1157 V_WR_GEN(txqs->gen)) | wr_lo;
1158 /* XXX gen? */
1159 wr_gen2(txd, txqs->gen);
1160
1161 } else {
1162 unsigned int ogen = txqs->gen;
1163 const uint64_t *fp = (const uint64_t *)sgl;
1164 struct work_request_hdr *wp = wrp;
1165
1166 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1167 V_WR_SGLSFLT(flits)) | wr_hi;
1168
1169 while (sgl_flits) {
1170 unsigned int avail = WR_FLITS - flits;
1171
1172 if (avail > sgl_flits)
1173 avail = sgl_flits;
1174 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1175 sgl_flits -= avail;
1176 ndesc--;
1177 if (!sgl_flits)
1178 break;
1179
1180 fp += avail;
1181 txd++;
1182 txsd++;
1183 if (++txqs->pidx == txq->size) {
1184 txqs->pidx = 0;
1185 txqs->gen ^= 1;
1186 txd = txq->desc;
1187 txsd = txq->sdesc;
1188 }
1189
1190 /*
1191 * when the head of the mbuf chain
1192 * is freed all clusters will be freed
1193 * with it
1194 */
1195 KASSERT(txsd->mi.mi_base == NULL,
1196 ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1197 wrp = (struct work_request_hdr *)txd;
1198 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1199 V_WR_SGLSFLT(1)) | wr_hi;
1200 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1201 sgl_flits + 1)) |
1202 V_WR_GEN(txqs->gen)) | wr_lo;
1203 wr_gen2(txd, txqs->gen);
1204 flits = 1;
1205 }
1206 wrp->wr_hi |= htonl(F_WR_EOP);
1207 wmb();
1208 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1209 wr_gen2((struct tx_desc *)wp, ogen);
1210 }
1211}
1212
1213/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1214#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1215
1216#ifdef VLAN_SUPPORTED
1217#define GET_VTAG(cntrl, m) \
1218do { \
1219 if ((m)->m_flags & M_VLANTAG) \
1220 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1221} while (0)
1222
1223#define GET_VTAG_MI(cntrl, mi) \
1224do { \
1225 if ((mi)->mi_flags & M_VLANTAG) \
1226 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1227} while (0)
1228#else
1229#define GET_VTAG(cntrl, m)
1230#define GET_VTAG_MI(cntrl, m)
1231#endif
1232
1233int
1234t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1235{
1236 adapter_t *sc;
1237 struct mbuf *m0;
1238 struct sge_txq *txq;
1239 struct txq_state txqs;
1240 struct port_info *pi;
1241 unsigned int ndesc, flits, cntrl, mlen;
1242 int err, nsegs, tso_info = 0;
1243
1244 struct work_request_hdr *wrp;
1245 struct tx_sw_desc *txsd;
1246 struct sg_ent *sgp, *sgl;
1247 uint32_t wr_hi, wr_lo, sgl_flits;
1248 bus_dma_segment_t segs[TX_MAX_SEGS];
1249
1250 struct tx_desc *txd;
1251 struct mbuf_vec *mv;
1252 struct mbuf_iovec *mi;
1253
1254 DPRINTF("t3_encap cpu=%d ", curcpu);
1255
1256 mi = NULL;
1257 pi = qs->port;
1258 sc = pi->adapter;
1259 txq = &qs->txq[TXQ_ETH];
1260 txd = &txq->desc[txq->pidx];
1261 txsd = &txq->sdesc[txq->pidx];
1262 sgl = txq->txq_sgl;
1263 m0 = *m;
1264
1265 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1266 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1267 if (cxgb_debug)
1268 printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx);
1269
1270 mtx_assert(&txq->lock, MA_OWNED);
1271 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1272/*
1273 * XXX need to add VLAN support for 6.x
1274 */
1275#ifdef VLAN_SUPPORTED
1276 if (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1277 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1278#endif
1279 KASSERT(txsd->mi.mi_base == NULL,
1280 ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1281 if (count > 1) {
1282 panic("count > 1 not support in CVS\n");
1283 if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1284 return (err);
1285 nsegs = count;
1286 } else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1287 if (cxgb_debug)
1288 printf("failed ... err=%d\n", err);
1289 return (err);
1290 }
1291 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1292
1293 if (!(m0->m_pkthdr.len <= PIO_LEN)) {
1294 mi_collapse_mbuf(&txsd->mi, m0);
1295 mi = &txsd->mi;
1296 }
1297 if (count > 1) {
1298 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1299 int i, fidx;
1300 struct mbuf_iovec *batchmi;
1301
1302 mv = mtomv(m0);
1303 batchmi = mv->mv_vec;
1304
1305 wrp = (struct work_request_hdr *)txd;
1306
1307 flits = count*2 + 1;
1308 txq_prod(txq, 1, &txqs);
1309
1310 for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1311 struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1312
1313 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1314 GET_VTAG_MI(cntrl, batchmi);
1315 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1316 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1317 cntrl |= F_TXPKT_IPCSUM_DIS;
1318 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1319 cntrl |= F_TXPKT_L4CSUM_DIS;
1320 cbe->cntrl = htonl(cntrl);
1321 cbe->len = htonl(batchmi->mi_len | 0x80000000);
1322 cbe->addr = htobe64(segs[i].ds_addr);
1323 txd->flit[fidx] |= htobe64(1 << 24);
1324 }
1325
1326 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1327 V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1328 wmb();
1329 wrp->wr_lo = htonl(V_WR_LEN(flits) |
1330 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1331 /* XXX gen? */
1332 wr_gen2(txd, txqs.gen);
1333 check_ring_tx_db(sc, txq);
1334
1335 return (0);
1336 } else if (tso_info) {
1337 int min_size = TCPPKTHDRSIZE, eth_type, tagged;
1338 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1339 struct ip *ip;
1340 struct tcphdr *tcp;
1341 char *pkthdr;
1342
1343 txd->flit[2] = 0;
1344 GET_VTAG(cntrl, m0);
1345 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1346 hdr->cntrl = htonl(cntrl);
1347 mlen = m0->m_pkthdr.len;
1348 hdr->len = htonl(mlen | 0x80000000);
1349
1350 DPRINTF("tso buf len=%d\n", mlen);
1351
1352 tagged = m0->m_flags & M_VLANTAG;
1353 if (!tagged)
1354 min_size -= ETHER_VLAN_ENCAP_LEN;
1355
1356 if (__predict_false(mlen < min_size)) {
1357 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1358 m0, mlen, m0->m_pkthdr.tso_segsz,
1359 m0->m_pkthdr.csum_flags, m0->m_flags);
1360 panic("tx tso packet too small");
1361 }
1362
1363 /* Make sure that ether, ip, tcp headers are all in m0 */
1364 if (__predict_false(m0->m_len < min_size)) {
1365 m0 = m_pullup(m0, min_size);
1366 if (__predict_false(m0 == NULL)) {
1367 /* XXX panic probably an overreaction */
1368 panic("couldn't fit header into mbuf");
1369 }
1370 }
1371 pkthdr = m0->m_data;
1372
1373 if (tagged) {
1374 eth_type = CPL_ETH_II_VLAN;
1375 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1376 ETHER_VLAN_ENCAP_LEN);
1377 } else {
1378 eth_type = CPL_ETH_II;
1379 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1380 }
1381 tcp = (struct tcphdr *)((uint8_t *)ip +
1382 sizeof(*ip));
1383
1384 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1385 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1386 V_LSO_TCPHDR_WORDS(tcp->th_off);
1387 hdr->lso_info = htonl(tso_info);
1388
1389 if (__predict_false(mlen <= PIO_LEN)) {
1390 /* pkt not undersized but fits in PIO_LEN */
1391 printf("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1392 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1393 txq_prod(txq, 1, &txqs);
1394 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1395 m_freem(m0);
1396 m0 = NULL;
1397 flits = (mlen + 7) / 8 + 3;
1398 hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1399 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1400 F_WR_SOP | F_WR_EOP | txqs.compl);
1401 wmb();
1402 hdr->wr.wr_lo = htonl(V_WR_LEN(flits) |
1403 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1404
1405 wr_gen2(txd, txqs.gen);
1406 check_ring_tx_db(sc, txq);
1407 return (0);
1408 }
1409 flits = 3;
1410 } else {
1411 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1412
1413 GET_VTAG(cntrl, m0);
1414 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1415 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1416 cntrl |= F_TXPKT_IPCSUM_DIS;
1417 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1418 cntrl |= F_TXPKT_L4CSUM_DIS;
1419 cpl->cntrl = htonl(cntrl);
1420 mlen = m0->m_pkthdr.len;
1421 cpl->len = htonl(mlen | 0x80000000);
1422
1423 if (mlen <= PIO_LEN) {
1424 txq_prod(txq, 1, &txqs);
1425 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1426 m_freem(m0);
1427 m0 = NULL;
1428 flits = (mlen + 7) / 8 + 2;
1429 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1430 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1431 F_WR_SOP | F_WR_EOP | txqs.compl);
1432 wmb();
1433 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1434 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1435
1436 wr_gen2(txd, txqs.gen);
1437 check_ring_tx_db(sc, txq);
1438 DPRINTF("pio buf\n");
1439 return (0);
1440 }
1441 DPRINTF("regular buf\n");
1442 flits = 2;
1443 }
1444 wrp = (struct work_request_hdr *)txd;
1445
1446#ifdef nomore
1447 /*
1448 * XXX need to move into one of the helper routines above
1449 *
1450 */
1451 if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1452 return (err);
1453 m0 = *m;
1454#endif
1455 ndesc = calc_tx_descs(m0, nsegs);
1456
1457 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1458 make_sgl(sgp, segs, nsegs);
1459
1460 sgl_flits = sgl_len(nsegs);
1461
1462 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1463 txq_prod(txq, ndesc, &txqs);
1464 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1465 wr_lo = htonl(V_WR_TID(txq->token));
1466 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1467 check_ring_tx_db(pi->adapter, txq);
1468
1469 if ((m0->m_type == MT_DATA) &&
1470 ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1471 (m0->m_ext.ext_type != EXT_PACKET)) {
1472 m0->m_flags &= ~M_EXT ;
1473 cxgb_mbufs_outstanding--;
1474 m_free(m0);
1475 }
1476
1477 return (0);
1478}
1479
1480
1481/**
1482 * write_imm - write a packet into a Tx descriptor as immediate data
1483 * @d: the Tx descriptor to write
1484 * @m: the packet
1485 * @len: the length of packet data to write as immediate data
1486 * @gen: the generation bit value to write
1487 *
1488 * Writes a packet as immediate data into a Tx descriptor. The packet
1489 * contains a work request at its beginning. We must write the packet
1490 * carefully so the SGE doesn't read accidentally before it's written in
1491 * its entirety.
1492 */
1493static __inline void
1494write_imm(struct tx_desc *d, struct mbuf *m,
1495 unsigned int len, unsigned int gen)
1496{
1497 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1498 struct work_request_hdr *to = (struct work_request_hdr *)d;
1499
1500 if (len > WR_LEN)
1501 panic("len too big %d\n", len);
1502 if (len < sizeof(*from))
1503 panic("len too small %d", len);
1504
1505 memcpy(&to[1], &from[1], len - sizeof(*from));
1506 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1507 V_WR_BCNTLFLT(len & 7));
1508 wmb();
1509 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1510 V_WR_LEN((len + 7) / 8));
1511 wr_gen2(d, gen);
1512
1513 /*
1514 * This check is a hack we should really fix the logic so
1515 * that this can't happen
1516 */
1517 if (m->m_type != MT_DONTFREE)
1518 m_freem(m);
1519
1520}
1521
1522/**
1523 * check_desc_avail - check descriptor availability on a send queue
1524 * @adap: the adapter
1525 * @q: the TX queue
1526 * @m: the packet needing the descriptors
1527 * @ndesc: the number of Tx descriptors needed
1528 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1529 *
1530 * Checks if the requested number of Tx descriptors is available on an
1531 * SGE send queue. If the queue is already suspended or not enough
1532 * descriptors are available the packet is queued for later transmission.
1533 * Must be called with the Tx queue locked.
1534 *
1535 * Returns 0 if enough descriptors are available, 1 if there aren't
1536 * enough descriptors and the packet has been queued, and 2 if the caller
1537 * needs to retry because there weren't enough descriptors at the
1538 * beginning of the call but some freed up in the mean time.
1539 */
1540static __inline int
1541check_desc_avail(adapter_t *adap, struct sge_txq *q,
1542 struct mbuf *m, unsigned int ndesc,
1543 unsigned int qid)
1544{
1545 /*
1546 * XXX We currently only use this for checking the control queue
1547 * the control queue is only used for binding qsets which happens
1548 * at init time so we are guaranteed enough descriptors
1549 */
1550 if (__predict_false(!mbufq_empty(&q->sendq))) {
1551addq_exit: mbufq_tail(&q->sendq, m);
1552 return 1;
1553 }
1554 if (__predict_false(q->size - q->in_use < ndesc)) {
1555
1556 struct sge_qset *qs = txq_to_qset(q, qid);
1557
1558 printf("stopping q\n");
1559
1560 setbit(&qs->txq_stopped, qid);
1561 smp_mb();
1562
1563 if (should_restart_tx(q) &&
1564 test_and_clear_bit(qid, &qs->txq_stopped))
1565 return 2;
1566
1567 q->stops++;
1568 goto addq_exit;
1569 }
1570 return 0;
1571}
1572
1573
1574/**
1575 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1576 * @q: the SGE control Tx queue
1577 *
1578 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1579 * that send only immediate data (presently just the control queues) and
1580 * thus do not have any mbufs
1581 */
1582static __inline void
1583reclaim_completed_tx_imm(struct sge_txq *q)
1584{
1585 unsigned int reclaim = q->processed - q->cleaned;
1586
1587 mtx_assert(&q->lock, MA_OWNED);
1588
1589 q->in_use -= reclaim;
1590 q->cleaned += reclaim;
1591}
1592
1593static __inline int
1594immediate(const struct mbuf *m)
1595{
1596 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1597}
1598
1599/**
1600 * ctrl_xmit - send a packet through an SGE control Tx queue
1601 * @adap: the adapter
1602 * @q: the control queue
1603 * @m: the packet
1604 *
1605 * Send a packet through an SGE control Tx queue. Packets sent through
1606 * a control queue must fit entirely as immediate data in a single Tx
1607 * descriptor and have no page fragments.
1608 */
1609static int
1610ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1611{
1612 int ret;
1613 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1614
1615 if (__predict_false(!immediate(m))) {
1616 m_freem(m);
1617 return 0;
1618 }
1619
1620 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1621 wrp->wr_lo = htonl(V_WR_TID(q->token));
1622
1623 mtx_lock(&q->lock);
1624again: reclaim_completed_tx_imm(q);
1625
1626 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1627 if (__predict_false(ret)) {
1628 if (ret == 1) {
1629 mtx_unlock(&q->lock);
1630 log(LOG_ERR, "no desc available\n");
1631 return (ENOSPC);
1632 }
1633 goto again;
1634 }
1635 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1636
1637 q->in_use++;
1638 if (++q->pidx >= q->size) {
1639 q->pidx = 0;
1640 q->gen ^= 1;
1641 }
1642 mtx_unlock(&q->lock);
1643 wmb();
1644 t3_write_reg(adap, A_SG_KDOORBELL,
1645 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1646 return (0);
1647}
1648
1649
1650/**
1651 * restart_ctrlq - restart a suspended control queue
1652 * @qs: the queue set cotaining the control queue
1653 *
1654 * Resumes transmission on a suspended Tx control queue.
1655 */
1656static void
1657restart_ctrlq(void *data, int npending)
1658{
1659 struct mbuf *m;
1660 struct sge_qset *qs = (struct sge_qset *)data;
1661 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1662 adapter_t *adap = qs->port->adapter;
1663
1664 log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1665
1666 mtx_lock(&q->lock);
1667again: reclaim_completed_tx_imm(q);
1668
1669 while (q->in_use < q->size &&
1670 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1671
1672 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1673
1674 if (++q->pidx >= q->size) {
1675 q->pidx = 0;
1676 q->gen ^= 1;
1677 }
1678 q->in_use++;
1679 }
1680 if (!mbufq_empty(&q->sendq)) {
1681 setbit(&qs->txq_stopped, TXQ_CTRL);
1682 smp_mb();
1683
1684 if (should_restart_tx(q) &&
1685 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1686 goto again;
1687 q->stops++;
1688 }
1689 mtx_unlock(&q->lock);
1690 wmb();
1691 t3_write_reg(adap, A_SG_KDOORBELL,
1692 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1693}
1694
1695
1696/*
1697 * Send a management message through control queue 0
1698 */
1699int
1700t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1701{
1702 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1703}
1704
1705
1706/**
1707 * free_qset - free the resources of an SGE queue set
1708 * @sc: the controller owning the queue set
1709 * @q: the queue set
1710 *
1711 * Release the HW and SW resources associated with an SGE queue set, such
1712 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1713 * queue set must be quiesced prior to calling this.
1714 */
1715void
1716t3_free_qset(adapter_t *sc, struct sge_qset *q)
1717{
1718 int i;
1719
1720 t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1721
1722 for (i = 0; i < SGE_TXQ_PER_SET; i++)
1723 if (q->txq[i].txq_mr.br_ring != NULL) {
1724 free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
1725 mtx_destroy(&q->txq[i].txq_mr.br_lock);
1726 }
1727 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1728 if (q->fl[i].desc) {
1729 mtx_lock_spin(&sc->sge.reg_lock);
1730 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1731 mtx_unlock_spin(&sc->sge.reg_lock);
1732 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1733 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1734 q->fl[i].desc_map);
1735 bus_dma_tag_destroy(q->fl[i].desc_tag);
1736 bus_dma_tag_destroy(q->fl[i].entry_tag);
1737 }
1738 if (q->fl[i].sdesc) {
1739 free_rx_bufs(sc, &q->fl[i]);
1740 free(q->fl[i].sdesc, M_DEVBUF);
1741 }
1742 }
1743
1744 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1745 if (q->txq[i].desc) {
1746 mtx_lock_spin(&sc->sge.reg_lock);
1747 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1748 mtx_unlock_spin(&sc->sge.reg_lock);
1749 bus_dmamap_unload(q->txq[i].desc_tag,
1750 q->txq[i].desc_map);
1751 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1752 q->txq[i].desc_map);
1753 bus_dma_tag_destroy(q->txq[i].desc_tag);
1754 bus_dma_tag_destroy(q->txq[i].entry_tag);
1755 MTX_DESTROY(&q->txq[i].lock);
1756 }
1757 if (q->txq[i].sdesc) {
1758 free(q->txq[i].sdesc, M_DEVBUF);
1759 }
1760 }
1761
1762 if (q->rspq.desc) {
1763 mtx_lock_spin(&sc->sge.reg_lock);
1764 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1765 mtx_unlock_spin(&sc->sge.reg_lock);
1766
1767 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1768 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1769 q->rspq.desc_map);
1770 bus_dma_tag_destroy(q->rspq.desc_tag);
1771 MTX_DESTROY(&q->rspq.lock);
1772 }
1773
1774 tcp_lro_free(&q->lro.ctrl);
1775
1776 bzero(q, sizeof(*q));
1777}
1778
1779/**
1780 * t3_free_sge_resources - free SGE resources
1781 * @sc: the adapter softc
1782 *
1783 * Frees resources used by the SGE queue sets.
1784 */
1785void
1786t3_free_sge_resources(adapter_t *sc)
1787{
1788 int i, nqsets;
1789
1790#ifdef IFNET_MULTIQUEUE
1791 panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__);
1792#endif
1793 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1794 nqsets += sc->port[i].nqsets;
1795
1796 for (i = 0; i < nqsets; ++i)
1797 t3_free_qset(sc, &sc->sge.qs[i]);
1798}
1799
1800/**
1801 * t3_sge_start - enable SGE
1802 * @sc: the controller softc
1803 *
1804 * Enables the SGE for DMAs. This is the last step in starting packet
1805 * transfers.
1806 */
1807void
1808t3_sge_start(adapter_t *sc)
1809{
1810 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1811}
1812
1813/**
1814 * t3_sge_stop - disable SGE operation
1815 * @sc: the adapter
1816 *
1817 * Disables the DMA engine. This can be called in emeregencies (e.g.,
1818 * from error interrupts) or from normal process context. In the latter
1819 * case it also disables any pending queue restart tasklets. Note that
1820 * if it is called in interrupt context it cannot disable the restart
1821 * tasklets as it cannot wait, however the tasklets will have no effect
1822 * since the doorbells are disabled and the driver will call this again
1823 * later from process context, at which time the tasklets will be stopped
1824 * if they are still running.
1825 */
1826void
1827t3_sge_stop(adapter_t *sc)
1828{
1829 int i, nqsets;
1830
1831 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1832
1833 if (sc->tq == NULL)
1834 return;
1835
1836 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1837 nqsets += sc->port[i].nqsets;
1838#ifdef notyet
1839 /*
1840 *
1841 * XXX
1842 */
1843 for (i = 0; i < nqsets; ++i) {
1844 struct sge_qset *qs = &sc->sge.qs[i];
1845
1846 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1847 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1848 }
1849#endif
1850}
1851
1852/**
1853 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
1854 * @adapter: the adapter
1855 * @q: the Tx queue to reclaim descriptors from
1856 * @reclaimable: the number of descriptors to reclaim
1857 * @m_vec_size: maximum number of buffers to reclaim
1858 * @desc_reclaimed: returns the number of descriptors reclaimed
1859 *
1860 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1861 * Tx buffers. Called with the Tx queue lock held.
1862 *
1863 * Returns number of buffers of reclaimed
1864 */
1865void
1866t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1867{
1868 struct tx_sw_desc *txsd;
1869 unsigned int cidx;
1870
1871#ifdef T3_TRACE
1872 T3_TRACE2(sc->tb[q->cntxt_id & 7],
1873 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1874#endif
1875 cidx = q->cidx;
1876 txsd = &q->sdesc[cidx];
1877 DPRINTF("reclaiming %d WR\n", reclaimable);
1878 mtx_assert(&q->lock, MA_OWNED);
1879 while (reclaimable--) {
1880 DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1881 if (txsd->mi.mi_base != NULL) {
1882 if (txsd->flags & TX_SW_DESC_MAPPED) {
1883 bus_dmamap_unload(q->entry_tag, txsd->map);
1884 txsd->flags &= ~TX_SW_DESC_MAPPED;
1885 }
1886 m_freem_iovec(&txsd->mi);
1887 buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1888 txsd->mi.mi_base = NULL;
1889
1890#if defined(DIAGNOSTIC) && 0
1891 if (m_get_priority(txsd->m[0]) != cidx)
1892 printf("pri=%d cidx=%d\n",
1893 (int)m_get_priority(txsd->m[0]), cidx);
1894#endif
1895
1896 } else
1897 q->txq_skipped++;
1898
1899 ++txsd;
1900 if (++cidx == q->size) {
1901 cidx = 0;
1902 txsd = q->sdesc;
1903 }
1904 }
1905 q->cidx = cidx;
1906
1907}
1908
1909void
1910t3_free_tx_desc_all(struct sge_txq *q)
1911{
1912 int i;
1913 struct tx_sw_desc *txsd;
1914
1915 for (i = 0; i < q->size; i++) {
1916 txsd = &q->sdesc[i];
1917 if (txsd->mi.mi_base != NULL) {
1918 if (txsd->flags & TX_SW_DESC_MAPPED) {
1919 bus_dmamap_unload(q->entry_tag, txsd->map);
1920 txsd->flags &= ~TX_SW_DESC_MAPPED;
1921 }
1922 m_freem_iovec(&txsd->mi);
1923 bzero(&txsd->mi, sizeof(txsd->mi));
1924 }
1925 }
1926}
1927
1928/**
1929 * is_new_response - check if a response is newly written
1930 * @r: the response descriptor
1931 * @q: the response queue
1932 *
1933 * Returns true if a response descriptor contains a yet unprocessed
1934 * response.
1935 */
1936static __inline int
1937is_new_response(const struct rsp_desc *r,
1938 const struct sge_rspq *q)
1939{
1940 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1941}
1942
1943#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1944#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1945 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1946 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1947 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1948
1949/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1950#define NOMEM_INTR_DELAY 2500
1951
1952/**
1953 * write_ofld_wr - write an offload work request
1954 * @adap: the adapter
1955 * @m: the packet to send
1956 * @q: the Tx queue
1957 * @pidx: index of the first Tx descriptor to write
1958 * @gen: the generation value to use
1959 * @ndesc: number of descriptors the packet will occupy
1960 *
1961 * Write an offload work request to send the supplied packet. The packet
1962 * data already carry the work request with most fields populated.
1963 */
1964static void
1965write_ofld_wr(adapter_t *adap, struct mbuf *m,
1966 struct sge_txq *q, unsigned int pidx,
1967 unsigned int gen, unsigned int ndesc,
1968 bus_dma_segment_t *segs, unsigned int nsegs)
1969{
1970 unsigned int sgl_flits, flits;
1971 struct work_request_hdr *from;
1972 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1973 struct tx_desc *d = &q->desc[pidx];
1974 struct txq_state txqs;
1975
1976 if (immediate(m) && nsegs == 0) {
1977 write_imm(d, m, m->m_len, gen);
1978 return;
1979 }
1980
1981 /* Only TX_DATA builds SGLs */
1982 from = mtod(m, struct work_request_hdr *);
1983 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1984
1985 flits = m->m_len / 8;
1986 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1987
1988 make_sgl(sgp, segs, nsegs);
1989 sgl_flits = sgl_len(nsegs);
1990
1991 txqs.gen = gen;
1992 txqs.pidx = pidx;
1993 txqs.compl = 0;
1994
1995 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1996 from->wr_hi, from->wr_lo);
1997}
1998
1999/**
2000 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
2001 * @m: the packet
2002 *
2003 * Returns the number of Tx descriptors needed for the given offload
2004 * packet. These packets are already fully constructed.
2005 */
2006static __inline unsigned int
2007calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2008{
2009 unsigned int flits, cnt = 0;
2010 int ndescs;
2011
2012 if (m->m_len <= WR_LEN && nsegs == 0)
2013 return (1); /* packet fits as immediate data */
2014
2015 if (m->m_flags & M_IOVEC)
2016 cnt = mtomv(m)->mv_count;
2017 else
2018 cnt = nsegs;
2019
2020 /* headers */
2021 flits = m->m_len / 8;
2022
2023 ndescs = flits_to_desc(flits + sgl_len(cnt));
2024
2025 CTR4(KTR_CXGB, "flits=%d sgl_len=%d nsegs=%d ndescs=%d",
2026 flits, sgl_len(cnt), nsegs, ndescs);
2027
2028 return (ndescs);
2029}
2030
2031/**
2032 * ofld_xmit - send a packet through an offload queue
2033 * @adap: the adapter
2034 * @q: the Tx offload queue
2035 * @m: the packet
2036 *
2037 * Send an offload packet through an SGE offload queue.
2038 */
2039static int
2040ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
2041{
2042 int ret, nsegs;
2043 unsigned int ndesc;
2044 unsigned int pidx, gen;
2045 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2046 struct tx_sw_desc *stx;
2047
2048 nsegs = m_get_sgllen(m);
2049 vsegs = m_get_sgl(m);
2050 ndesc = calc_tx_descs_ofld(m, nsegs);
2051 busdma_map_sgl(vsegs, segs, nsegs);
2052
2053 stx = &q->sdesc[q->pidx];
2054 KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
2055
2056 mtx_lock(&q->lock);
2057again: reclaim_completed_tx_(q, 16);
2058 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2059 if (__predict_false(ret)) {
2060 if (ret == 1) {
2061 printf("no ofld desc avail\n");
2062
2063 m_set_priority(m, ndesc); /* save for restart */
2064 mtx_unlock(&q->lock);
2065 return (EINTR);
2066 }
2067 goto again;
2068 }
2069
2070 gen = q->gen;
2071 q->in_use += ndesc;
2072 pidx = q->pidx;
2073 q->pidx += ndesc;
2074 if (q->pidx >= q->size) {
2075 q->pidx -= q->size;
2076 q->gen ^= 1;
2077 }
2078#ifdef T3_TRACE
2079 T3_TRACE5(adap->tb[q->cntxt_id & 7],
2080 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2081 ndesc, pidx, skb->len, skb->len - skb->data_len,
2082 skb_shinfo(skb)->nr_frags);
2083#endif
2084 mtx_unlock(&q->lock);
2085
2086 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2087 check_ring_tx_db(adap, q);
2088 return (0);
2089}
2090
2091/**
2092 * restart_offloadq - restart a suspended offload queue
2093 * @qs: the queue set cotaining the offload queue
2094 *
2095 * Resumes transmission on a suspended Tx offload queue.
2096 */
2097static void
2098restart_offloadq(void *data, int npending)
2099{
2100 struct mbuf *m;
2101 struct sge_qset *qs = data;
2102 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2103 adapter_t *adap = qs->port->adapter;
2104 bus_dma_segment_t segs[TX_MAX_SEGS];
2105 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2106 int nsegs, cleaned;
2107
2108 mtx_lock(&q->lock);
2109again: cleaned = reclaim_completed_tx_(q, 16);
2110
2111 while ((m = mbufq_peek(&q->sendq)) != NULL) {
2112 unsigned int gen, pidx;
2113 unsigned int ndesc = m_get_priority(m);
2114
2115 if (__predict_false(q->size - q->in_use < ndesc)) {
2116 setbit(&qs->txq_stopped, TXQ_OFLD);
2117 smp_mb();
2118
2119 if (should_restart_tx(q) &&
2120 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2121 goto again;
2122 q->stops++;
2123 break;
2124 }
2125
2126 gen = q->gen;
2127 q->in_use += ndesc;
2128 pidx = q->pidx;
2129 q->pidx += ndesc;
2130 if (q->pidx >= q->size) {
2131 q->pidx -= q->size;
2132 q->gen ^= 1;
2133 }
2134
2135 (void)mbufq_dequeue(&q->sendq);
2136 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2137 mtx_unlock(&q->lock);
2138 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2139 mtx_lock(&q->lock);
2140 }
2141 mtx_unlock(&q->lock);
2142
2143#if USE_GTS
2144 set_bit(TXQ_RUNNING, &q->flags);
2145 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2146#endif
2147 wmb();
2148 t3_write_reg(adap, A_SG_KDOORBELL,
2149 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2150}
2151
2152/**
2153 * queue_set - return the queue set a packet should use
2154 * @m: the packet
2155 *
2156 * Maps a packet to the SGE queue set it should use. The desired queue
2157 * set is carried in bits 1-3 in the packet's priority.
2158 */
2159static __inline int
2160queue_set(const struct mbuf *m)
2161{
2162 return m_get_priority(m) >> 1;
2163}
2164
2165/**
2166 * is_ctrl_pkt - return whether an offload packet is a control packet
2167 * @m: the packet
2168 *
2169 * Determines whether an offload packet should use an OFLD or a CTRL
2170 * Tx queue. This is indicated by bit 0 in the packet's priority.
2171 */
2172static __inline int
2173is_ctrl_pkt(const struct mbuf *m)
2174{
2175 return m_get_priority(m) & 1;
2176}
2177
2178/**
2179 * t3_offload_tx - send an offload packet
2180 * @tdev: the offload device to send to
2181 * @m: the packet
2182 *
2183 * Sends an offload packet. We use the packet priority to select the
2184 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2185 * should be sent as regular or control, bits 1-3 select the queue set.
2186 */
2187int
2188t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2189{
2190 adapter_t *adap = tdev2adap(tdev);
2191 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2192
2193 if (__predict_false(is_ctrl_pkt(m)))
2194 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2195
2196 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2197}
2198
2199/**
2200 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2201 * @tdev: the offload device that will be receiving the packets
2202 * @q: the SGE response queue that assembled the bundle
2203 * @m: the partial bundle
2204 * @n: the number of packets in the bundle
2205 *
2206 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2207 */
2208static __inline void
2209deliver_partial_bundle(struct t3cdev *tdev,
2210 struct sge_rspq *q,
2211 struct mbuf *mbufs[], int n)
2212{
2213 if (n) {
2214 q->offload_bundles++;
2215 cxgb_ofld_recv(tdev, mbufs, n);
2216 }
2217}
2218
2219static __inline int
2220rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2221 struct mbuf *m, struct mbuf *rx_gather[],
2222 unsigned int gather_idx)
2223{
2224
2225 rq->offload_pkts++;
2226 m->m_pkthdr.header = mtod(m, void *);
2227 rx_gather[gather_idx++] = m;
2228 if (gather_idx == RX_BUNDLE_SIZE) {
2229 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2230 gather_idx = 0;
2231 rq->offload_bundles++;
2232 }
2233 return (gather_idx);
2234}
2235
2236static void
2237restart_tx(struct sge_qset *qs)
2238{
2239 struct adapter *sc = qs->port->adapter;
2240
2241
2242 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2243 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2244 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2245 qs->txq[TXQ_OFLD].restarts++;
2246 DPRINTF("restarting TXQ_OFLD\n");
2247 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2248 }
2249 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2250 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2251 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2252 qs->txq[TXQ_CTRL].in_use);
2253
2254 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2255 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2256 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2257 qs->txq[TXQ_CTRL].restarts++;
2258 DPRINTF("restarting TXQ_CTRL\n");
2259 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2260 }
2261}
2262
2263/**
2264 * t3_sge_alloc_qset - initialize an SGE queue set
2265 * @sc: the controller softc
2266 * @id: the queue set id
2267 * @nports: how many Ethernet ports will be using this queue set
2268 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2269 * @p: configuration parameters for this queue set
2270 * @ntxq: number of Tx queues for the queue set
2271 * @pi: port info for queue set
2272 *
2273 * Allocate resources and initialize an SGE queue set. A queue set
2274 * comprises a response queue, two Rx free-buffer queues, and up to 3
2275 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2276 * queue, offload queue, and control queue.
2277 */
2278int
2279t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2280 const struct qset_params *p, int ntxq, struct port_info *pi)
2281{
2282 struct sge_qset *q = &sc->sge.qs[id];
2283 int i, header_size, ret = 0;
2284
2285 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2286 if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
2287 M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
2288 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2289 goto err;
2290 }
2291 q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
2292 q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
2293 mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
2294 }
2295
2296 init_qset_cntxt(q, id);
2297 q->idx = id;
2298
2299 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2300 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2301 &q->fl[0].desc, &q->fl[0].sdesc,
2302 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2303 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2304 printf("error %d from alloc ring fl0\n", ret);
2305 goto err;
2306 }
2307
2308 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2309 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2310 &q->fl[1].desc, &q->fl[1].sdesc,
2311 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2312 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2313 printf("error %d from alloc ring fl1\n", ret);
2314 goto err;
2315 }
2316
2317 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2318 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2319 &q->rspq.desc_tag, &q->rspq.desc_map,
2320 NULL, NULL)) != 0) {
2321 printf("error %d from alloc ring rspq\n", ret);
2322 goto err;
2323 }
2324
2325 for (i = 0; i < ntxq; ++i) {
2326 /*
2327 * The control queue always uses immediate data so does not
2328 * need to keep track of any mbufs.
2329 * XXX Placeholder for future TOE support.
2330 */
2331 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2332
2333 if ((ret = alloc_ring(sc, p->txq_size[i],
2334 sizeof(struct tx_desc), sz,
2335 &q->txq[i].phys_addr, &q->txq[i].desc,
2336 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2337 &q->txq[i].desc_map,
2338 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2339 printf("error %d from alloc ring tx %i\n", ret, i);
2340 goto err;
2341 }
2342 mbufq_init(&q->txq[i].sendq);
2343 q->txq[i].gen = 1;
2344 q->txq[i].size = p->txq_size[i];
2345 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2346 device_get_unit(sc->dev), irq_vec_idx, i);
2347 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2348 }
2349
2350 q->txq[TXQ_ETH].port = pi;
2351
2352 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2353 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2354 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2355 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2356
2357 q->fl[0].gen = q->fl[1].gen = 1;
2358 q->fl[0].size = p->fl_size;
2359 q->fl[1].size = p->jumbo_size;
2360
2361 q->rspq.gen = 1;
2362 q->rspq.cidx = 0;
2363 q->rspq.size = p->rspq_size;
2364
2365
2366 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2367 q->txq[TXQ_ETH].stop_thres = nports *
2368 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2369
2370 q->fl[0].buf_size = (MCLBYTES - header_size);
2371 q->fl[0].zone = zone_clust;
2372 q->fl[0].type = EXT_CLUSTER;
2373#if __FreeBSD_version > 800000
2374 if (cxgb_use_16k_clusters) {
2375 q->fl[1].buf_size = MJUM16BYTES - header_size;
2376 q->fl[1].zone = zone_jumbo16;
2377 q->fl[1].type = EXT_JUMBO16;
2378 } else {
2379 q->fl[1].buf_size = MJUM9BYTES - header_size;
2380 q->fl[1].zone = zone_jumbo9;
2381 q->fl[1].type = EXT_JUMBO9;
2382 }
2383#else
2384 q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2385 q->fl[1].zone = zone_jumbop;
2386 q->fl[1].type = EXT_JUMBOP;
2387#endif
2388
2389 /*
2390 * We allocate and setup the lro_ctrl structure irrespective of whether
2391 * lro is available and/or enabled.
2392 */
2393 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2394 ret = tcp_lro_init(&q->lro.ctrl);
2395 if (ret) {
2396 printf("error %d from tcp_lro_init\n", ret);
2397 goto err;
2398 }
2399 q->lro.ctrl.ifp = pi->ifp;
2400
2401 mtx_lock_spin(&sc->sge.reg_lock);
2402 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2403 q->rspq.phys_addr, q->rspq.size,
2404 q->fl[0].buf_size, 1, 0);
2405 if (ret) {
2406 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2407 goto err_unlock;
2408 }
2409
2410 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2411 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2412 q->fl[i].phys_addr, q->fl[i].size,
2413 q->fl[i].buf_size, p->cong_thres, 1,
2414 0);
2415 if (ret) {
2416 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2417 goto err_unlock;
2418 }
2419 }
2420
2421 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2422 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2423 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2424 1, 0);
2425 if (ret) {
2426 printf("error %d from t3_sge_init_ecntxt\n", ret);
2427 goto err_unlock;
2428 }
2429
2430 if (ntxq > 1) {
2431 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2432 USE_GTS, SGE_CNTXT_OFLD, id,
2433 q->txq[TXQ_OFLD].phys_addr,
2434 q->txq[TXQ_OFLD].size, 0, 1, 0);
2435 if (ret) {
2436 printf("error %d from t3_sge_init_ecntxt\n", ret);
2437 goto err_unlock;
2438 }
2439 }
2440
2441 if (ntxq > 2) {
2442 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2443 SGE_CNTXT_CTRL, id,
2444 q->txq[TXQ_CTRL].phys_addr,
2445 q->txq[TXQ_CTRL].size,
2446 q->txq[TXQ_CTRL].token, 1, 0);
2447 if (ret) {
2448 printf("error %d from t3_sge_init_ecntxt\n", ret);
2449 goto err_unlock;
2450 }
2451 }
2452
2453 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2454 device_get_unit(sc->dev), irq_vec_idx);
2455 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2456
2457 mtx_unlock_spin(&sc->sge.reg_lock);
2458 t3_update_qset_coalesce(q, p);
2459 q->port = pi;
2460
2461 refill_fl(sc, &q->fl[0], q->fl[0].size);
2462 refill_fl(sc, &q->fl[1], q->fl[1].size);
2463 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2464
2465 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2466 V_NEWTIMER(q->rspq.holdoff_tmr));
2467
2468 return (0);
2469
2470err_unlock:
2471 mtx_unlock_spin(&sc->sge.reg_lock);
2472err:
2473 t3_free_qset(sc, q);
2474
2475 return (ret);
2476}
2477
2478/*
2479 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2480 * ethernet data. Hardware assistance with various checksums and any vlan tag
2481 * will also be taken into account here.
2482 */
2483void
2484t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2485{
2486 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2487 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2488 struct ifnet *ifp = pi->ifp;
2489
2490 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2491
2492 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2493 cpl->csum_valid && cpl->csum == 0xffff) {
2494 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2495 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2496 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2497 m->m_pkthdr.csum_data = 0xffff;
2498 }
2499 /*
2500 * XXX need to add VLAN support for 6.x
2501 */
2502#ifdef VLAN_SUPPORTED
2503 if (__predict_false(cpl->vlan_valid)) {
2504 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2505 m->m_flags |= M_VLANTAG;
2506 }
2507#endif
2508
2509 m->m_pkthdr.rcvif = ifp;
2510 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2511#ifndef DISABLE_MBUF_IOVEC
2512 m_explode(m);
2513#endif
2514 /*
2515 * adjust after conversion to mbuf chain
2516 */
2517 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2518 m->m_len -= (sizeof(*cpl) + ethpad);
2519 m->m_data += (sizeof(*cpl) + ethpad);
2520}
2521
2522static void
2523ext_free_handler(void *arg1, void * arg2)
2524{
2525 uintptr_t type = (uintptr_t)arg2;
2526 uma_zone_t zone;
2527 struct mbuf *m;
2528
2529 m = arg1;
2530 zone = m_getzonefromtype(type);
2531 m->m_ext.ext_type = (int)type;
2532 cxgb_ext_freed++;
2533 cxgb_cache_put(zone, m);
2534}
2535
2536static void
2537init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2538{
2539 struct mbuf *m;
2540 int header_size;
2541
2542 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2543 sizeof(struct m_ext_) + sizeof(uint32_t);
2544
2545 bzero(cl, header_size);
2546 m = (struct mbuf *)cl;
2547
2548 cxgb_ext_inited++;
2549 SLIST_INIT(&m->m_pkthdr.tags);
2550 m->m_type = MT_DATA;
2551 m->m_flags = flags | M_NOFREE | M_EXT;
2552 m->m_data = cl + header_size;
2553 m->m_ext.ext_buf = cl;
2554 m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2555 m->m_ext.ext_size = m_getsizefromtype(type);
2556 m->m_ext.ext_free = ext_free_handler;
2557 m->m_ext.ext_arg1 = cl;
2558 m->m_ext.ext_arg2 = (void *)(uintptr_t)type;
2559 m->m_ext.ext_type = EXT_EXTREF;
2560 *(m->m_ext.ref_cnt) = 1;
2561 DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2562}
2563
2564
2565/**
2566 * get_packet - return the next ingress packet buffer from a free list
2567 * @adap: the adapter that received the packet
2568 * @drop_thres: # of remaining buffers before we start dropping packets
2569 * @qs: the qset that the SGE free list holding the packet belongs to
2570 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2571 * @r: response descriptor
2572 *
2573 * Get the next packet from a free list and complete setup of the
2574 * sk_buff. If the packet is small we make a copy and recycle the
2575 * original buffer, otherwise we use the original buffer itself. If a
2576 * positive drop threshold is supplied packets are dropped and their
2577 * buffers recycled if (a) the number of remaining buffers is under the
2578 * threshold and the packet is too big to copy, or (b) the packet should
2579 * be copied but there is no memory for the copy.
2580 */
2581#ifdef DISABLE_MBUF_IOVEC
2582
2583static int
2584get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2585 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2586{
2587
2588 unsigned int len_cq = ntohl(r->len_cq);
2589 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2590 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2591 uint32_t len = G_RSPD_LEN(len_cq);
2592 uint32_t flags = ntohl(r->flags);
2593 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2594 caddr_t cl;
2595 struct mbuf *m, *m0;
2596 int ret = 0;
2597
2598 prefetch(sd->rxsd_cl);
2599
2600 fl->credits--;
2601 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2602
2603 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2604 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2605 goto skip_recycle;
2606 cl = mtod(m0, void *);
2607 memcpy(cl, sd->data, len);
2608 recycle_rx_buf(adap, fl, fl->cidx);
2609 m = m0;
2610 m0->m_len = len;
2611 } else {
2612 skip_recycle:
2613
2614 bus_dmamap_unload(fl->entry_tag, sd->map);
2615 cl = sd->rxsd_cl;
2616 m = m0 = (struct mbuf *)cl;
2617
2618 if ((sopeop == RSPQ_SOP_EOP) ||
2619 (sopeop == RSPQ_SOP))
2620 flags = M_PKTHDR;
2621 init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2622 m0->m_len = len;
2623 }
2624 switch(sopeop) {
2625 case RSPQ_SOP_EOP:
2626 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2627 mh->mh_head = mh->mh_tail = m;
2628 m->m_pkthdr.len = len;
2629 ret = 1;
2630 break;
2631 case RSPQ_NSOP_NEOP:
2632 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2633 if (mh->mh_tail == NULL) {
2634 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2635 m_freem(m);
2636 break;
2637 }
2638 mh->mh_tail->m_next = m;
2639 mh->mh_tail = m;
2640 mh->mh_head->m_pkthdr.len += len;
2641 ret = 0;
2642 break;
2643 case RSPQ_SOP:
2644 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2645 m->m_pkthdr.len = len;
2646 mh->mh_head = mh->mh_tail = m;
2647 ret = 0;
2648 break;
2649 case RSPQ_EOP:
2650 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2651 mh->mh_head->m_pkthdr.len += len;
2652 mh->mh_tail->m_next = m;
2653 mh->mh_tail = m;
2654 ret = 1;
2655 break;
2656 }
2657 if (++fl->cidx == fl->size)
2658 fl->cidx = 0;
2659
2660 return (ret);
2661}
2662
2663#else
2664
2665static int
2666get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2667 struct mbuf **m, struct rsp_desc *r)
2668{
2669
2670 unsigned int len_cq = ntohl(r->len_cq);
2671 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2672 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2673 uint32_t len = G_RSPD_LEN(len_cq);
2674 uint32_t flags = ntohl(r->flags);
2675 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2676 void *cl;
2677 int ret = 0;
2678 struct mbuf *m0;
2679#if 0
2680 if ((sd + 1 )->rxsd_cl)
2681 prefetch((sd + 1)->rxsd_cl);
2682 if ((sd + 2)->rxsd_cl)
2683 prefetch((sd + 2)->rxsd_cl);
2684#endif
2685 DPRINTF("rx cpu=%d\n", curcpu);
2686 fl->credits--;
2687 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2688
2689 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2690 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2691 goto skip_recycle;
2692 cl = mtod(m0, void *);
2693 memcpy(cl, sd->data, len);
2694 recycle_rx_buf(adap, fl, fl->cidx);
2695 *m = m0;
2696 } else {
2697 skip_recycle:
2698 bus_dmamap_unload(fl->entry_tag, sd->map);
2699 cl = sd->rxsd_cl;
2700 *m = m0 = (struct mbuf *)cl;
2701 }
2702
2703 switch(sopeop) {
2704 case RSPQ_SOP_EOP:
2705 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2706 if (cl == sd->rxsd_cl)
2707 init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2708 m0->m_len = m0->m_pkthdr.len = len;
2709 ret = 1;
2710 goto done;
2711 break;
2712 case RSPQ_NSOP_NEOP:
2713 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2714 panic("chaining unsupported");
2715 ret = 0;
2716 break;
2717 case RSPQ_SOP:
2718 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2719 panic("chaining unsupported");
2720 m_iovinit(m0);
2721 ret = 0;
2722 break;
2723 case RSPQ_EOP:
2724 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2725 panic("chaining unsupported");
2726 ret = 1;
2727 break;
2728 }
2729 panic("append not supported");
2730#if 0
2731 m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2732#endif
2733done:
2734 if (++fl->cidx == fl->size)
2735 fl->cidx = 0;
2736
2737 return (ret);
2738}
2739#endif
2740/**
2741 * handle_rsp_cntrl_info - handles control information in a response
2742 * @qs: the queue set corresponding to the response
2743 * @flags: the response control flags
2744 *
2745 * Handles the control information of an SGE response, such as GTS
2746 * indications and completion credits for the queue set's Tx queues.
2747 * HW coalesces credits, we don't do any extra SW coalescing.
2748 */
2749static __inline void
2750handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2751{
2752 unsigned int credits;
2753
2754#if USE_GTS
2755 if (flags & F_RSPD_TXQ0_GTS)
2756 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2757#endif
2758 credits = G_RSPD_TXQ0_CR(flags);
2759 if (credits)
2760 qs->txq[TXQ_ETH].processed += credits;
2761
2762 credits = G_RSPD_TXQ2_CR(flags);
2763 if (credits)
2764 qs->txq[TXQ_CTRL].processed += credits;
2765
2766# if USE_GTS
2767 if (flags & F_RSPD_TXQ1_GTS)
2768 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2769# endif
2770 credits = G_RSPD_TXQ1_CR(flags);
2771 if (credits)
2772 qs->txq[TXQ_OFLD].processed += credits;
2773
2774}
2775
2776static void
2777check_ring_db(adapter_t *adap, struct sge_qset *qs,
2778 unsigned int sleeping)
2779{
2780 ;
2781}
2782
2783/**
2784 * process_responses - process responses from an SGE response queue
2785 * @adap: the adapter
2786 * @qs: the queue set to which the response queue belongs
2787 * @budget: how many responses can be processed in this round
2788 *
2789 * Process responses from an SGE response queue up to the supplied budget.
2790 * Responses include received packets as well as credits and other events
2791 * for the queues that belong to the response queue's queue set.
2792 * A negative budget is effectively unlimited.
2793 *
2794 * Additionally choose the interrupt holdoff time for the next interrupt
2795 * on this queue. If the system is under memory shortage use a fairly
2796 * long delay to help recovery.
2797 */
2798int
2799process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2800{
2801 struct sge_rspq *rspq = &qs->rspq;
2802 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2803 int budget_left = budget;
2804 unsigned int sleeping = 0;
2805 int lro_enabled = qs->lro.enabled;
2806 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2807 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2808 int ngathered = 0;
2809#ifdef DEBUG
2810 static int last_holdoff = 0;
2811 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2812 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2813 last_holdoff = rspq->holdoff_tmr;
2814 }
2815#endif
2816 rspq->next_holdoff = rspq->holdoff_tmr;
2817
2818 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2819 int eth, eop = 0, ethpad = 0;
2820 uint32_t flags = ntohl(r->flags);
2821 uint32_t rss_csum = *(const uint32_t *)r;
2822 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2823
2824 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2825
2826 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2827 struct mbuf *m;
2828
2829 if (cxgb_debug)
2830 printf("async notification\n");
2831
2832 if (rspq->rspq_mh.mh_head == NULL) {
2833 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2834 m = rspq->rspq_mh.mh_head;
2835 } else {
2836 m = m_gethdr(M_DONTWAIT, MT_DATA);
2837 }
2838
2839 /* XXX m is lost here if rspq->rspq_mbuf is not NULL */
2840
2841 if (m == NULL)
2842 goto no_mem;
2843
2844 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2845 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2846 *mtod(m, char *) = CPL_ASYNC_NOTIF;
2847 rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
2848 eop = 1;
2849 rspq->async_notif++;
2850 goto skip;
2851 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2852 struct mbuf *m = NULL;
2853
2854 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2855 r->rss_hdr.opcode, rspq->cidx);
2856 if (rspq->rspq_mh.mh_head == NULL)
2857 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2858 else
2859 m = m_gethdr(M_DONTWAIT, MT_DATA);
2860
2861 if (rspq->rspq_mh.mh_head == NULL && m == NULL) {
2862 no_mem:
2863 rspq->next_holdoff = NOMEM_INTR_DELAY;
2864 budget_left--;
2865 break;
2866 }
2867 get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
2868 eop = 1;
2869 rspq->imm_data++;
2870 } else if (r->len_cq) {
2871 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2872
2873#ifdef DISABLE_MBUF_IOVEC
2874 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2875#else
2876 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2877#endif
2878#ifdef IFNET_MULTIQUEUE
2879 rspq->rspq_mh.mh_head->m_pkthdr.rss_hash = rss_hash;
2880#endif
2881 ethpad = 2;
2882 } else {
2883 DPRINTF("pure response\n");
2884 rspq->pure_rsps++;
2885 }
2886 skip:
2887 if (flags & RSPD_CTRL_MASK) {
2888 sleeping |= flags & RSPD_GTS_MASK;
2889 handle_rsp_cntrl_info(qs, flags);
2890 }
2891
2892 r++;
2893 if (__predict_false(++rspq->cidx == rspq->size)) {
2894 rspq->cidx = 0;
2895 rspq->gen ^= 1;
2896 r = rspq->desc;
2897 }
2898 prefetch(r);
2899 if (++rspq->credits >= (rspq->size / 4)) {
2900 refill_rspq(adap, rspq, rspq->credits);
2901 rspq->credits = 0;
2902 }
2903 DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2904
2905 if (!eth && eop) {
2906 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2907 /*
2908 * XXX size mismatch
2909 */
2910 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2911
2912
2913 ngathered = rx_offload(&adap->tdev, rspq,
2914 rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2915 rspq->rspq_mh.mh_head = NULL;
2916 DPRINTF("received offload packet\n");
2917
2918 } else if (eth && eop) {
2919 struct mbuf *m = rspq->rspq_mh.mh_head;
2920 prefetch(mtod(m, uint8_t *));
2921 prefetch(mtod(m, uint8_t *) + L1_CACHE_BYTES);
2922
2923 t3_rx_eth(adap, rspq, m, ethpad);
2924 if (lro_enabled && lro_ctrl->lro_cnt &&
2925 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
2926 /* successfully queue'd for LRO */
2927 } else {
2928 /*
2929 * LRO not enabled, packet unsuitable for LRO,
2930 * or unable to queue. Pass it up right now in
2931 * either case.
2932 */
2933 struct ifnet *ifp = m->m_pkthdr.rcvif;
2934 (*ifp->if_input)(ifp, m);
2935 }
2936 DPRINTF("received tunnel packet\n");
2937 rspq->rspq_mh.mh_head = NULL;
2938
2939 }
2940 __refill_fl_lt(adap, &qs->fl[0], 32);
2941 __refill_fl_lt(adap, &qs->fl[1], 32);
2942 --budget_left;
2943 }
2944
2945 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2946
2947 /* Flush LRO */
2948 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
2949 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
2950 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
2951 tcp_lro_flush(lro_ctrl, queued);
2952 }
2953
2954 if (sleeping)
2955 check_ring_db(adap, qs, sleeping);
2956
2957 smp_mb(); /* commit Tx queue processed updates */
2958 if (__predict_false(qs->txq_stopped > 1)) {
2959 printf("restarting tx on %p\n", qs);
2960
2961 restart_tx(qs);
2962 }
2963
2964 __refill_fl_lt(adap, &qs->fl[0], 512);
2965 __refill_fl_lt(adap, &qs->fl[1], 512);
2966 budget -= budget_left;
2967 return (budget);
2968}
2969
2970/*
2971 * A helper function that processes responses and issues GTS.
2972 */
2973static __inline int
2974process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2975{
2976 int work;
2977 static int last_holdoff = 0;
2978
2979 work = process_responses(adap, rspq_to_qset(rq), -1);
2980
2981 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2982 printf("next_holdoff=%d\n", rq->next_holdoff);
2983 last_holdoff = rq->next_holdoff;
2984 }
2985 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2986 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2987
2988 return (work);
2989}
2990
2991
2992/*
2993 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2994 * Handles data events from SGE response queues as well as error and other
2995 * async events as they all use the same interrupt pin. We use one SGE
2996 * response queue per port in this mode and protect all response queues with
2997 * queue 0's lock.
2998 */
2999void
3000t3b_intr(void *data)
3001{
3002 uint32_t i, map;
3003 adapter_t *adap = data;
3004 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3005
3006 t3_write_reg(adap, A_PL_CLI, 0);
3007 map = t3_read_reg(adap, A_SG_DATA_INTR);
3008
3009 if (!map)
3010 return;
3011
3012 if (__predict_false(map & F_ERRINTR))
3013 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3014
3015 mtx_lock(&q0->lock);
3016 for_each_port(adap, i)
3017 if (map & (1 << i))
3018 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3019 mtx_unlock(&q0->lock);
3020}
3021
3022/*
3023 * The MSI interrupt handler. This needs to handle data events from SGE
3024 * response queues as well as error and other async events as they all use
3025 * the same MSI vector. We use one SGE response queue per port in this mode
3026 * and protect all response queues with queue 0's lock.
3027 */
3028void
3029t3_intr_msi(void *data)
3030{
3031 adapter_t *adap = data;
3032 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3033 int i, new_packets = 0;
3034
3035 mtx_lock(&q0->lock);
3036
3037 for_each_port(adap, i)
3038 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3039 new_packets = 1;
3040 mtx_unlock(&q0->lock);
3041 if (new_packets == 0)
3042 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3043}
3044
3045void
3046t3_intr_msix(void *data)
3047{
3048 struct sge_qset *qs = data;
3049 adapter_t *adap = qs->port->adapter;
3050 struct sge_rspq *rspq = &qs->rspq;
3051#ifndef IFNET_MULTIQUEUE
3052 mtx_lock(&rspq->lock);
3053#else
3054 if (mtx_trylock(&rspq->lock))
3055#endif
3056 {
3057
3058 if (process_responses_gts(adap, rspq) == 0)
3059 rspq->unhandled_irqs++;
3060 mtx_unlock(&rspq->lock);
3061 }
3062}
3063
3064#define QDUMP_SBUF_SIZE 32 * 400
3065static int
3066t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3067{
3068 struct sge_rspq *rspq;
3069 struct sge_qset *qs;
3070 int i, err, dump_end, idx;
3071 static int multiplier = 1;
3072 struct sbuf *sb;
3073 struct rsp_desc *rspd;
3074 uint32_t data[4];
3075
3076 rspq = arg1;
3077 qs = rspq_to_qset(rspq);
3078 if (rspq->rspq_dump_count == 0)
3079 return (0);
3080 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3081 log(LOG_WARNING,
3082 "dump count is too large %d\n", rspq->rspq_dump_count);
3083 rspq->rspq_dump_count = 0;
3084 return (EINVAL);
3085 }
3086 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3087 log(LOG_WARNING,
3088 "dump start of %d is greater than queue size\n",
3089 rspq->rspq_dump_start);
3090 rspq->rspq_dump_start = 0;
3091 return (EINVAL);
3092 }
3093 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3094 if (err)
3095 return (err);
3096retry_sbufops:
3097 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3098
3099 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3100 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3101 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3102 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3103 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3104
3105 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3106 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3107
3108 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3109 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3110 idx = i & (RSPQ_Q_SIZE-1);
3111
3112 rspd = &rspq->desc[idx];
3113 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3114 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3115 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3116 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3117 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3118 be32toh(rspd->len_cq), rspd->intr_gen);
3119 }
3120 if (sbuf_overflowed(sb)) {
3121 sbuf_delete(sb);
3122 multiplier++;
3123 goto retry_sbufops;
3124 }
3125 sbuf_finish(sb);
3126 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3127 sbuf_delete(sb);
3128 return (err);
3129}
3130
3131static int
3132t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3133{
3134 struct sge_txq *txq;
3135 struct sge_qset *qs;
3136 int i, j, err, dump_end;
3137 static int multiplier = 1;
3138 struct sbuf *sb;
3139 struct tx_desc *txd;
3140 uint32_t *WR, wr_hi, wr_lo, gen;
3141 uint32_t data[4];
3142
3143 txq = arg1;
3144 qs = txq_to_qset(txq, TXQ_ETH);
3145 if (txq->txq_dump_count == 0) {
3146 return (0);
3147 }
3148 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3149 log(LOG_WARNING,
3150 "dump count is too large %d\n", txq->txq_dump_count);
3151 txq->txq_dump_count = 1;
3152 return (EINVAL);
3153 }
3154 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3155 log(LOG_WARNING,
3156 "dump start of %d is greater than queue size\n",
3157 txq->txq_dump_start);
3158 txq->txq_dump_start = 0;
3159 return (EINVAL);
3160 }
3161 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3162 if (err)
3163 return (err);
3164
3165
3166retry_sbufops:
3167 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3168
3169 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3170 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3171 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3172 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3173 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3174 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3175 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3176 txq->txq_dump_start,
3177 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3178
3179 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3180 for (i = txq->txq_dump_start; i < dump_end; i++) {
3181 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3182 WR = (uint32_t *)txd->flit;
3183 wr_hi = ntohl(WR[0]);
3184 wr_lo = ntohl(WR[1]);
3185 gen = G_WR_GEN(wr_lo);
3186
3187 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3188 wr_hi, wr_lo, gen);
3189 for (j = 2; j < 30; j += 4)
3190 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3191 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3192
3193 }
3194 if (sbuf_overflowed(sb)) {
3195 sbuf_delete(sb);
3196 multiplier++;
3197 goto retry_sbufops;
3198 }
3199 sbuf_finish(sb);
3200 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3201 sbuf_delete(sb);
3202 return (err);
3203}
3204
3205static int
3206t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3207{
3208 struct sge_txq *txq;
3209 struct sge_qset *qs;
3210 int i, j, err, dump_end;
3211 static int multiplier = 1;
3212 struct sbuf *sb;
3213 struct tx_desc *txd;
3214 uint32_t *WR, wr_hi, wr_lo, gen;
3215
3216 txq = arg1;
3217 qs = txq_to_qset(txq, TXQ_CTRL);
3218 if (txq->txq_dump_count == 0) {
3219 return (0);
3220 }
3221 if (txq->txq_dump_count > 256) {
3222 log(LOG_WARNING,
3223 "dump count is too large %d\n", txq->txq_dump_count);
3224 txq->txq_dump_count = 1;
3225 return (EINVAL);
3226 }
3227 if (txq->txq_dump_start > 255) {
3228 log(LOG_WARNING,
3229 "dump start of %d is greater than queue size\n",
3230 txq->txq_dump_start);
3231 txq->txq_dump_start = 0;
3232 return (EINVAL);
3233 }
3234
3235retry_sbufops:
3236 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3237 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3238 txq->txq_dump_start,
3239 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3240
3241 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3242 for (i = txq->txq_dump_start; i < dump_end; i++) {
3243 txd = &txq->desc[i & (255)];
3244 WR = (uint32_t *)txd->flit;
3245 wr_hi = ntohl(WR[0]);
3246 wr_lo = ntohl(WR[1]);
3247 gen = G_WR_GEN(wr_lo);
3248
3249 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3250 wr_hi, wr_lo, gen);
3251 for (j = 2; j < 30; j += 4)
3252 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3253 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3254
3255 }
3256 if (sbuf_overflowed(sb)) {
3257 sbuf_delete(sb);
3258 multiplier++;
3259 goto retry_sbufops;
3260 }
3261 sbuf_finish(sb);
3262 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3263 sbuf_delete(sb);
3264 return (err);
3265}
3266
3267static int
3268t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3269{
3270 adapter_t *sc = arg1;
3271 struct qset_params *qsp = &sc->params.sge.qset[0];
3272 int coalesce_usecs;
3273 struct sge_qset *qs;
3274 int i, j, err, nqsets = 0;
3275 struct mtx *lock;
3276
3277 if ((sc->flags & FULL_INIT_DONE) == 0)
3278 return (ENXIO);
3279
3280 coalesce_usecs = qsp->coalesce_usecs;
3281 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3282
3283 if (err != 0) {
3284 return (err);
3285 }
3286 if (coalesce_usecs == qsp->coalesce_usecs)
3287 return (0);
3288
3289 for (i = 0; i < sc->params.nports; i++)
3290 for (j = 0; j < sc->port[i].nqsets; j++)
3291 nqsets++;
3292
3293 coalesce_usecs = max(1, coalesce_usecs);
3294
3295 for (i = 0; i < nqsets; i++) {
3296 qs = &sc->sge.qs[i];
3297 qsp = &sc->params.sge.qset[i];
3298 qsp->coalesce_usecs = coalesce_usecs;
3299
3300 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3301 &sc->sge.qs[0].rspq.lock;
3302
3303 mtx_lock(lock);
3304 t3_update_qset_coalesce(qs, qsp);
3305 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3306 V_NEWTIMER(qs->rspq.holdoff_tmr));
3307 mtx_unlock(lock);
3308 }
3309
3310 return (0);
3311}
3312
3313
3314void
3315t3_add_attach_sysctls(adapter_t *sc)
3316{
3317 struct sysctl_ctx_list *ctx;
3318 struct sysctl_oid_list *children;
3319
3320 ctx = device_get_sysctl_ctx(sc->dev);
3321 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3322
3323 /* random information */
3324 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3325 "firmware_version",
3326 CTLFLAG_RD, &sc->fw_version,
3327 0, "firmware version");
3328 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3329 "hw_revision",
3330 CTLFLAG_RD, &sc->params.rev,
3331 0, "chip model");
3332 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3333 "enable_debug",
3334 CTLFLAG_RW, &cxgb_debug,
3335 0, "enable verbose debugging output");
3336 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3337 CTLFLAG_RD, &sc->tunq_coalesce,
3338 "#tunneled packets freed");
3339 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3340 "txq_overrun",
3341 CTLFLAG_RD, &txq_fills,
3342 0, "#times txq overrun");
3343 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3344 "pcpu_cache_enable",
3345 CTLFLAG_RW, &cxgb_pcpu_cache_enable,
3346 0, "#enable driver local pcpu caches");
3347 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3348 "cache_alloc",
3349 CTLFLAG_RD, &cxgb_cached_allocations,
3350 0, "#times a cluster was allocated from cache");
3351 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3352 "cached",
3353 CTLFLAG_RD, &cxgb_cached,
3354 0, "#times a cluster was cached");
3355 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3356 "ext_freed",
3357 CTLFLAG_RD, &cxgb_ext_freed,
3358 0, "#times a cluster was freed through ext_free");
3359 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3360 "ext_inited",
3361 CTLFLAG_RD, &cxgb_ext_inited,
3362 0, "#times a cluster was initialized for ext_free");
3363 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3364 "mbufs_outstanding",
3365 CTLFLAG_RD, &cxgb_mbufs_outstanding,
3366 0, "#mbufs in flight in the driver");
3367 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3368 "pack_outstanding",
3369 CTLFLAG_RD, &cxgb_pack_outstanding,
3370 0, "#packet in flight in the driver");
3371}
3372
3373
3374static const char *rspq_name = "rspq";
3375static const char *txq_names[] =
3376{
3377 "txq_eth",
3378 "txq_ofld",
3379 "txq_ctrl"
3380};
3381
3382static int
3383sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3384{
3385 struct port_info *p = arg1;
3386 uint64_t *parg;
3387
3388 if (!p)
3389 return (EINVAL);
3390
3391 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3392
3393 PORT_LOCK(p);
3394 t3_mac_update_stats(&p->mac);
3395 PORT_UNLOCK(p);
3396
3397 return (sysctl_handle_quad(oidp, parg, 0, req));
3398}
3399
3400void
3401t3_add_configured_sysctls(adapter_t *sc)
3402{
3403 struct sysctl_ctx_list *ctx;
3404 struct sysctl_oid_list *children;
3405 int i, j;
3406
3407 ctx = device_get_sysctl_ctx(sc->dev);
3408 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3409
3410 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3411 "intr_coal",
3412 CTLTYPE_INT|CTLFLAG_RW, sc,
3413 0, t3_set_coalesce_usecs,
3414 "I", "interrupt coalescing timer (us)");
3415
3416 for (i = 0; i < sc->params.nports; i++) {
3417 struct port_info *pi = &sc->port[i];
3418 struct sysctl_oid *poid;
3419 struct sysctl_oid_list *poidlist;
3420 struct mac_stats *mstats = &pi->mac.stats;
3421
3422 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3423 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3424 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3425 poidlist = SYSCTL_CHILDREN(poid);
3426 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3427 "nqsets", CTLFLAG_RD, &pi->nqsets,
3428 0, "#queue sets");
3429
3430 for (j = 0; j < pi->nqsets; j++) {
3431 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3432 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid, *lropoid;
3433 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist, *lropoidlist;
3434 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3435
3436 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3437
3438 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3439 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3440 qspoidlist = SYSCTL_CHILDREN(qspoid);
3441
3442 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3443 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3444 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3445
3446 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3447 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3448 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3449
3450 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3451 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3452 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3453
3454 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3455 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3456 lropoidlist = SYSCTL_CHILDREN(lropoid);
3457
3458 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3459 CTLFLAG_RD, &qs->rspq.size,
3460 0, "#entries in response queue");
3461 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3462 CTLFLAG_RD, &qs->rspq.cidx,
3463 0, "consumer index");
3464 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3465 CTLFLAG_RD, &qs->rspq.credits,
3466 0, "#credits");
3467 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3468 CTLFLAG_RD, &qs->rspq.phys_addr,
3469 "physical_address_of the queue");
3470 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3471 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3472 0, "start rspq dump entry");
3473 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3474 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3475 0, "#rspq entries to dump");
3476 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3477 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3478 0, t3_dump_rspq, "A", "dump of the response queue");
3479
3480
3481 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3482 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3483 0, "#tunneled packets dropped");
3484 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3485 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3486 0, "#tunneled packets waiting to be sent");
3487 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3488 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3489 0, "#tunneled packets queue producer index");
3490 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3491 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3492 0, "#tunneled packets queue consumer index");
3493 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3494 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3495 0, "#tunneled packets processed by the card");
3496 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3497 CTLFLAG_RD, &txq->cleaned,
3498 0, "#tunneled packets cleaned");
3499 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3500 CTLFLAG_RD, &txq->in_use,
3501 0, "#tunneled packet slots in use");
3502 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3503 CTLFLAG_RD, &txq->txq_frees,
3504 "#tunneled packets freed");
3505 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3506 CTLFLAG_RD, &txq->txq_skipped,
3507 0, "#tunneled packet descriptors skipped");
3508 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3509 CTLFLAG_RD, &txq->txq_coalesced,
3510 0, "#tunneled packets coalesced");
3511 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3512 CTLFLAG_RD, &txq->txq_enqueued,
3513 0, "#tunneled packets enqueued to hardware");
3514 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3515 CTLFLAG_RD, &qs->txq_stopped,
3516 0, "tx queues stopped");
3517 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3518 CTLFLAG_RD, &txq->phys_addr,
3519 "physical_address_of the queue");
3520 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3521 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3522 0, "txq generation");
3523 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3524 CTLFLAG_RD, &txq->cidx,
3525 0, "hardware queue cidx");
3526 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3527 CTLFLAG_RD, &txq->pidx,
3528 0, "hardware queue pidx");
3529 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3530 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3531 0, "txq start idx for dump");
3532 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3533 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3534 0, "txq #entries to dump");
3535 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3536 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3537 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3538
3539 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3540 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3541 0, "ctrlq start idx for dump");
3542 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3543 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3544 0, "ctrl #entries to dump");
3545 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3546 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3547 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3548
3549 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3550 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3551 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3552 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3553 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3554 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3555 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3556 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3557 }
3558
3559 /* Now add a node for mac stats. */
3560 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3561 CTLFLAG_RD, NULL, "MAC statistics");
3562 poidlist = SYSCTL_CHILDREN(poid);
3563
3564 /*
3565 * We (ab)use the length argument (arg2) to pass on the offset
3566 * of the data that we are interested in. This is only required
3567 * for the quad counters that are updated from the hardware (we
3568 * make sure that we return the latest value).
3569 * sysctl_handle_macstat first updates *all* the counters from
3570 * the hardware, and then returns the latest value of the
3571 * requested counter. Best would be to update only the
3572 * requested counter from hardware, but t3_mac_update_stats()
3573 * hides all the register details and we don't want to dive into
3574 * all that here.
3575 */
3576#define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3577 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3578 sysctl_handle_macstat, "QU", 0)
3579 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3580 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3581 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3582 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3583 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3584 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3585 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3586 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3587 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3588 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3589 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3590 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3591 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3592 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3593 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3594 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3595 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3596 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3597 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3598 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3599 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3600 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3601 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3602 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3603 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3604 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3605 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3606 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3607 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3608 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3609 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3610 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3611 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3612 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3613 CXGB_SYSCTL_ADD_QUAD(rx_short);
3614 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3615 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3616 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3617 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3618 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3619 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3620 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3621 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3622 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3623 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3624#undef CXGB_SYSCTL_ADD_QUAD
3625
3626#define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3627 CTLFLAG_RD, &mstats->a, 0)
3628 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3629 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3630 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3631 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3632 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3633 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3634 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3635 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3636 CXGB_SYSCTL_ADD_ULONG(num_resets);
3637#undef CXGB_SYSCTL_ADD_ULONG
3638 }
3639}
3640
3641/**
3642 * t3_get_desc - dump an SGE descriptor for debugging purposes
3643 * @qs: the queue set
3644 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3645 * @idx: the descriptor index in the queue
3646 * @data: where to dump the descriptor contents
3647 *
3648 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3649 * size of the descriptor.
3650 */
3651int
3652t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3653 unsigned char *data)
3654{
3655 if (qnum >= 6)
3656 return (EINVAL);
3657
3658 if (qnum < 3) {
3659 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3660 return -EINVAL;
3661 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3662 return sizeof(struct tx_desc);
3663 }
3664
3665 if (qnum == 3) {
3666 if (!qs->rspq.desc || idx >= qs->rspq.size)
3667 return (EINVAL);
3668 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3669 return sizeof(struct rsp_desc);
3670 }
3671
3672 qnum -= 4;
3673 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3674 return (EINVAL);
3675 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3676 return sizeof(struct rx_desc);
3677}