Deleted Added
sdiff udiff text old ( 170789 ) new ( 170869 )
full compact
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 170869 2007-06-17 04:33:38Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/conf.h>
39#include <machine/bus.h>
40#include <machine/resource.h>
41#include <sys/bus_dma.h>
42#include <sys/rman.h>
43#include <sys/queue.h>
44#include <sys/sysctl.h>
45#include <sys/taskqueue.h>
46
47
48#include <sys/proc.h>
49#include <sys/sched.h>
50#include <sys/smp.h>
51#include <sys/systm.h>
52
53#include <netinet/in_systm.h>
54#include <netinet/in.h>
55#include <netinet/ip.h>
56#include <netinet/tcp.h>
57
58#include <dev/pci/pcireg.h>
59#include <dev/pci/pcivar.h>
60
61#ifdef CONFIG_DEFINED
62#include <cxgb_include.h>
63#else
64#include <dev/cxgb/cxgb_include.h>
65#endif
66
67uint32_t collapse_free = 0;
68uint32_t mb_free_vec_free = 0;
69int collapse_mbufs = 0;
70static int recycle_enable = 1;
71
72
73/*
74 * XXX GC
75 */
76#define NET_XMIT_CN 2
77#define NET_XMIT_SUCCESS 0
78
79#define USE_GTS 0
80
81#define SGE_RX_SM_BUF_SIZE 1536
82#define SGE_RX_DROP_THRES 16
83#define SGE_RX_COPY_THRES 128
84
85/*
86 * Period of the Tx buffer reclaim timer. This timer does not need to run
87 * frequently as Tx buffers are usually reclaimed by new Tx packets.
88 */
89#define TX_RECLAIM_PERIOD (hz >> 1)
90
91/*
92 * work request size in bytes
93 */
94#define WR_LEN (WR_FLITS * 8)
95
96/*
97 * Values for sge_txq.flags
98 */
99enum {
100 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
101 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
102};
103
104struct tx_desc {
105 uint64_t flit[TX_DESC_FLITS];
106} __packed;
107
108struct rx_desc {
109 uint32_t addr_lo;
110 uint32_t len_gen;
111 uint32_t gen2;
112 uint32_t addr_hi;
113} __packed;;
114
115struct rsp_desc { /* response queue descriptor */
116 struct rss_header rss_hdr;
117 uint32_t flags;
118 uint32_t len_cq;
119 uint8_t imm_data[47];
120 uint8_t intr_gen;
121} __packed;
122
123#define RX_SW_DESC_MAP_CREATED (1 << 0)
124#define TX_SW_DESC_MAP_CREATED (1 << 1)
125#define RX_SW_DESC_INUSE (1 << 3)
126#define TX_SW_DESC_MAPPED (1 << 4)
127
128#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
129#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
130#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
131#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
132
133struct tx_sw_desc { /* SW state per Tx descriptor */
134 struct mbuf *m;
135 bus_dmamap_t map;
136 int flags;
137};
138
139struct rx_sw_desc { /* SW state per Rx descriptor */
140 void *cl;
141 bus_dmamap_t map;
142 int flags;
143};
144
145struct txq_state {
146 unsigned int compl;
147 unsigned int gen;
148 unsigned int pidx;
149};
150
151struct refill_fl_cb_arg {
152 int error;
153 bus_dma_segment_t seg;
154 int nseg;
155};
156
157/*
158 * Maps a number of flits to the number of Tx descriptors that can hold them.
159 * The formula is
160 *
161 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
162 *
163 * HW allows up to 4 descriptors to be combined into a WR.
164 */
165static uint8_t flit_desc_map[] = {
166 0,
167#if SGE_NUM_GENBITS == 1
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
171 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
172#elif SGE_NUM_GENBITS == 2
173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
175 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
176 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
177#else
178# error "SGE_NUM_GENBITS must be 1 or 2"
179#endif
180};
181
182
183static int lro_default = 0;
184int cxgb_debug = 0;
185
186static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
187static void sge_timer_cb(void *arg);
188static void sge_timer_reclaim(void *arg, int ncount);
189static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec);
190
191/**
192 * reclaim_completed_tx - reclaims completed Tx descriptors
193 * @adapter: the adapter
194 * @q: the Tx queue to reclaim completed descriptors from
195 *
196 * Reclaims Tx descriptors that the SGE has indicated it has processed,
197 * and frees the associated buffers if possible. Called with the Tx
198 * queue's lock held.
199 */
200static __inline int
201reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec)
202{
203 int reclaimed, reclaim = desc_reclaimable(q);
204 int n = 0;
205
206 mtx_assert(&q->lock, MA_OWNED);
207 if (reclaim > 0) {
208 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec);
209 reclaimed = min(reclaim, nbufs);
210 q->cleaned += reclaimed;
211 q->in_use -= reclaimed;
212 }
213 return (n);
214}
215
216/**
217 * should_restart_tx - are there enough resources to restart a Tx queue?
218 * @q: the Tx queue
219 *
220 * Checks if there are enough descriptors to restart a suspended Tx queue.
221 */
222static __inline int
223should_restart_tx(const struct sge_txq *q)
224{
225 unsigned int r = q->processed - q->cleaned;
226
227 return q->in_use - r < (q->size >> 1);
228}
229
230/**
231 * t3_sge_init - initialize SGE
232 * @adap: the adapter
233 * @p: the SGE parameters
234 *
235 * Performs SGE initialization needed every time after a chip reset.
236 * We do not initialize any of the queue sets here, instead the driver
237 * top-level must request those individually. We also do not enable DMA
238 * here, that should be done after the queues have been set up.
239 */
240void
241t3_sge_init(adapter_t *adap, struct sge_params *p)
242{
243 u_int ctrl, ups;
244
245 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
246
247 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
248 F_CQCRDTCTRL |
249 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
250 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
251#if SGE_NUM_GENBITS == 1
252 ctrl |= F_EGRGENCTRL;
253#endif
254 if (adap->params.rev > 0) {
255 if (!(adap->flags & (USING_MSIX | USING_MSI)))
256 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
257 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
258 }
259 t3_write_reg(adap, A_SG_CONTROL, ctrl);
260 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
261 V_LORCQDRBTHRSH(512));
262 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
263 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
264 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
265 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
266 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
267 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
268 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
269 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
270 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
271}
272
273
274/**
275 * sgl_len - calculates the size of an SGL of the given capacity
276 * @n: the number of SGL entries
277 *
278 * Calculates the number of flits needed for a scatter/gather list that
279 * can hold the given number of entries.
280 */
281static __inline unsigned int
282sgl_len(unsigned int n)
283{
284 return ((3 * n) / 2 + (n & 1));
285}
286
287/**
288 * get_imm_packet - return the next ingress packet buffer from a response
289 * @resp: the response descriptor containing the packet data
290 *
291 * Return a packet containing the immediate data of the given response.
292 */
293static __inline void
294get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl)
295{
296 int len;
297 uint32_t flags = ntohl(resp->flags);
298 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
299
300 /*
301 * would be a firmware bug
302 */
303 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
304 return;
305
306 len = G_RSPD_LEN(ntohl(resp->len_cq));
307 switch (sopeop) {
308 case RSPQ_SOP_EOP:
309 m->m_len = m->m_pkthdr.len = len;
310 memcpy(mtod(m, uint8_t *), resp->imm_data, len);
311 break;
312 case RSPQ_EOP:
313 memcpy(cl, resp->imm_data, len);
314 m_iovappend(m, cl, MSIZE, len, 0);
315 break;
316 }
317}
318
319
320static __inline u_int
321flits_to_desc(u_int n)
322{
323 return (flit_desc_map[n]);
324}
325
326void
327t3_sge_err_intr_handler(adapter_t *adapter)
328{
329 unsigned int v, status;
330
331
332 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
333
334 if (status & F_RSPQCREDITOVERFOW)
335 CH_ALERT(adapter, "SGE response queue credit overflow\n");
336
337 if (status & F_RSPQDISABLED) {
338 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
339
340 CH_ALERT(adapter,
341 "packet delivered to disabled response queue (0x%x)\n",
342 (v >> S_RSPQ0DISABLED) & 0xff);
343 }
344
345 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
346 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
347 t3_fatal_err(adapter);
348}
349
350void
351t3_sge_prep(adapter_t *adap, struct sge_params *p)
352{
353 int i;
354
355 /* XXX Does ETHER_ALIGN need to be accounted for here? */
356 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
357
358 for (i = 0; i < SGE_QSETS; ++i) {
359 struct qset_params *q = p->qset + i;
360
361 q->polling = adap->params.rev > 0;
362
363 q->coalesce_nsecs = 5000;
364
365 q->rspq_size = RSPQ_Q_SIZE;
366 q->fl_size = FL_Q_SIZE;
367 q->jumbo_size = JUMBO_Q_SIZE;
368 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
369 q->txq_size[TXQ_OFLD] = 1024;
370 q->txq_size[TXQ_CTRL] = 256;
371 q->cong_thres = 0;
372 }
373}
374
375int
376t3_sge_alloc(adapter_t *sc)
377{
378
379 /* The parent tag. */
380 if (bus_dma_tag_create( NULL, /* parent */
381 1, 0, /* algnmnt, boundary */
382 BUS_SPACE_MAXADDR, /* lowaddr */
383 BUS_SPACE_MAXADDR, /* highaddr */
384 NULL, NULL, /* filter, filterarg */
385 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
386 BUS_SPACE_UNRESTRICTED, /* nsegments */
387 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
388 0, /* flags */
389 NULL, NULL, /* lock, lockarg */
390 &sc->parent_dmat)) {
391 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
392 return (ENOMEM);
393 }
394
395 /*
396 * DMA tag for normal sized RX frames
397 */
398 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
399 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
400 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
401 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
402 return (ENOMEM);
403 }
404
405 /*
406 * DMA tag for jumbo sized RX frames.
407 */
408 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR,
409 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE,
410 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
411 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
412 return (ENOMEM);
413 }
414
415 /*
416 * DMA tag for TX frames.
417 */
418 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
419 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
420 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
421 NULL, NULL, &sc->tx_dmat)) {
422 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
423 return (ENOMEM);
424 }
425
426 return (0);
427}
428
429int
430t3_sge_free(struct adapter * sc)
431{
432
433 if (sc->tx_dmat != NULL)
434 bus_dma_tag_destroy(sc->tx_dmat);
435
436 if (sc->rx_jumbo_dmat != NULL)
437 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
438
439 if (sc->rx_dmat != NULL)
440 bus_dma_tag_destroy(sc->rx_dmat);
441
442 if (sc->parent_dmat != NULL)
443 bus_dma_tag_destroy(sc->parent_dmat);
444
445 return (0);
446}
447
448void
449t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
450{
451
452 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
453 qs->rspq.polling = 0 /* p->polling */;
454}
455
456static void
457refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
458{
459 struct refill_fl_cb_arg *cb_arg = arg;
460
461 cb_arg->error = error;
462 cb_arg->seg = segs[0];
463 cb_arg->nseg = nseg;
464
465}
466
467/**
468 * refill_fl - refill an SGE free-buffer list
469 * @sc: the controller softc
470 * @q: the free-list to refill
471 * @n: the number of new buffers to allocate
472 *
473 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
474 * The caller must assure that @n does not exceed the queue's capacity.
475 */
476static void
477refill_fl(adapter_t *sc, struct sge_fl *q, int n)
478{
479 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
480 struct rx_desc *d = &q->desc[q->pidx];
481 struct refill_fl_cb_arg cb_arg;
482 void *cl;
483 int err;
484
485 cb_arg.error = 0;
486 while (n--) {
487 /*
488 * We only allocate a cluster, mbuf allocation happens after rx
489 */
490 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) {
491 log(LOG_WARNING, "Failed to allocate cluster\n");
492 goto done;
493 }
494 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
495 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
496 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
497 uma_zfree(q->zone, cl);
498 goto done;
499 }
500 sd->flags |= RX_SW_DESC_MAP_CREATED;
501 }
502 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size,
503 refill_fl_cb, &cb_arg, 0);
504
505 if (err != 0 || cb_arg.error) {
506 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
507 /*
508 * XXX free cluster
509 */
510 return;
511 }
512
513 sd->flags |= RX_SW_DESC_INUSE;
514 sd->cl = cl;
515 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
516 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
517 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
518 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
519
520 d++;
521 sd++;
522
523 if (++q->pidx == q->size) {
524 q->pidx = 0;
525 q->gen ^= 1;
526 sd = q->sdesc;
527 d = q->desc;
528 }
529 q->credits++;
530 }
531
532done:
533 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
534}
535
536
537/**
538 * free_rx_bufs - free the Rx buffers on an SGE free list
539 * @sc: the controle softc
540 * @q: the SGE free list to clean up
541 *
542 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
543 * this queue should be stopped before calling this function.
544 */
545static void
546free_rx_bufs(adapter_t *sc, struct sge_fl *q)
547{
548 u_int cidx = q->cidx;
549
550 while (q->credits--) {
551 struct rx_sw_desc *d = &q->sdesc[cidx];
552
553 if (d->flags & RX_SW_DESC_INUSE) {
554 bus_dmamap_unload(q->entry_tag, d->map);
555 bus_dmamap_destroy(q->entry_tag, d->map);
556 uma_zfree(q->zone, d->cl);
557 }
558 d->cl = NULL;
559 if (++cidx == q->size)
560 cidx = 0;
561 }
562}
563
564static __inline void
565__refill_fl(adapter_t *adap, struct sge_fl *fl)
566{
567 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
568}
569
570/**
571 * recycle_rx_buf - recycle a receive buffer
572 * @adapter: the adapter
573 * @q: the SGE free list
574 * @idx: index of buffer to recycle
575 *
576 * Recycles the specified buffer on the given free list by adding it at
577 * the next available slot on the list.
578 */
579static void
580recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
581{
582 struct rx_desc *from = &q->desc[idx];
583 struct rx_desc *to = &q->desc[q->pidx];
584
585 q->sdesc[q->pidx] = q->sdesc[idx];
586 to->addr_lo = from->addr_lo; // already big endian
587 to->addr_hi = from->addr_hi; // likewise
588 wmb();
589 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
590 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
591 q->credits++;
592
593 if (++q->pidx == q->size) {
594 q->pidx = 0;
595 q->gen ^= 1;
596 }
597 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
598}
599
600static void
601alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
602{
603 uint32_t *addr;
604
605 addr = arg;
606 *addr = segs[0].ds_addr;
607}
608
609static int
610alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
611 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
612 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
613{
614 size_t len = nelem * elem_size;
615 void *s = NULL;
616 void *p = NULL;
617 int err;
618
619 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
620 BUS_SPACE_MAXADDR_32BIT,
621 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
622 len, 0, NULL, NULL, tag)) != 0) {
623 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
624 return (ENOMEM);
625 }
626
627 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
628 map)) != 0) {
629 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
630 return (ENOMEM);
631 }
632
633 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
634 bzero(p, len);
635 *(void **)desc = p;
636
637 if (sw_size) {
638 len = nelem * sw_size;
639 s = malloc(len, M_DEVBUF, M_WAITOK);
640 bzero(s, len);
641 *(void **)sdesc = s;
642 }
643 if (parent_entry_tag == NULL)
644 return (0);
645
646 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
647 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
648 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
649 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
650 NULL, NULL, entry_tag)) != 0) {
651 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
652 return (ENOMEM);
653 }
654 return (0);
655}
656
657static void
658sge_slow_intr_handler(void *arg, int ncount)
659{
660 adapter_t *sc = arg;
661
662 t3_slow_intr_handler(sc);
663}
664
665static void
666sge_timer_cb(void *arg)
667{
668 adapter_t *sc = arg;
669 struct port_info *p;
670 struct sge_qset *qs;
671 struct sge_txq *txq;
672 int i, j;
673 int reclaim_eth, reclaim_ofl, refill_rx;
674
675 for (i = 0; i < sc->params.nports; i++)
676 for (j = 0; j < sc->port[i].nqsets; j++) {
677 qs = &sc->sge.qs[i + j];
678 txq = &qs->txq[0];
679 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
680 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
681 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
682 (qs->fl[1].credits < qs->fl[1].size));
683 if (reclaim_eth || reclaim_ofl || refill_rx) {
684 p = &sc->port[i];
685 taskqueue_enqueue(p->tq, &p->timer_reclaim_task);
686 break;
687 }
688 }
689 if (sc->open_device_map != 0)
690 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
691}
692
693/*
694 * This is meant to be a catch-all function to keep sge state private
695 * to sge.c
696 *
697 */
698int
699t3_sge_init_adapter(adapter_t *sc)
700{
701 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
702 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
703 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
704 return (0);
705}
706
707int
708t3_sge_init_port(struct port_info *p)
709{
710 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p);
711 return (0);
712}
713
714void
715t3_sge_deinit_sw(adapter_t *sc)
716{
717 int i;
718
719 callout_drain(&sc->sge_timer_ch);
720 if (sc->tq)
721 taskqueue_drain(sc->tq, &sc->slow_intr_task);
722 for (i = 0; i < sc->params.nports; i++)
723 if (sc->port[i].tq != NULL)
724 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
725}
726
727/**
728 * refill_rspq - replenish an SGE response queue
729 * @adapter: the adapter
730 * @q: the response queue to replenish
731 * @credits: how many new responses to make available
732 *
733 * Replenishes a response queue by making the supplied number of responses
734 * available to HW.
735 */
736static __inline void
737refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
738{
739
740 /* mbufs are allocated on demand when a rspq entry is processed. */
741 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
742 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
743}
744
745
746static void
747sge_timer_reclaim(void *arg, int ncount)
748{
749 struct port_info *p = arg;
750 int i, nqsets = p->nqsets;
751 adapter_t *sc = p->adapter;
752 struct sge_qset *qs;
753 struct sge_txq *txq;
754 struct mtx *lock;
755 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
756 int n, reclaimable;
757
758 for (i = 0; i < nqsets; i++) {
759 qs = &sc->sge.qs[i];
760 txq = &qs->txq[TXQ_ETH];
761 reclaimable = desc_reclaimable(txq);
762 if (reclaimable > 0) {
763 mtx_lock(&txq->lock);
764 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
765 mtx_unlock(&txq->lock);
766
767 for (i = 0; i < n; i++)
768 m_freem_vec(m_vec[i]);
769
770 if (p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
771 txq->size - txq->in_use >= TX_START_MAX_DESC) {
772 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
773 taskqueue_enqueue(p->tq, &p->start_task);
774 }
775 }
776
777 txq = &qs->txq[TXQ_OFLD];
778 reclaimable = desc_reclaimable(txq);
779 if (reclaimable > 0) {
780 mtx_lock(&txq->lock);
781 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
782 mtx_unlock(&txq->lock);
783
784 for (i = 0; i < n; i++)
785 m_freem_vec(m_vec[i]);
786 }
787
788 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
789 &sc->sge.qs[0].rspq.lock;
790
791 if (mtx_trylock(lock)) {
792 /* XXX currently assume that we are *NOT* polling */
793 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
794
795 if (qs->fl[0].credits < qs->fl[0].size - 16)
796 __refill_fl(sc, &qs->fl[0]);
797 if (qs->fl[1].credits < qs->fl[1].size - 16)
798 __refill_fl(sc, &qs->fl[1]);
799
800 if (status & (1 << qs->rspq.cntxt_id)) {
801 if (qs->rspq.credits) {
802 refill_rspq(sc, &qs->rspq, 1);
803 qs->rspq.credits--;
804 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
805 1 << qs->rspq.cntxt_id);
806 }
807 }
808 mtx_unlock(lock);
809 }
810 }
811}
812
813/**
814 * init_qset_cntxt - initialize an SGE queue set context info
815 * @qs: the queue set
816 * @id: the queue set id
817 *
818 * Initializes the TIDs and context ids for the queues of a queue set.
819 */
820static void
821init_qset_cntxt(struct sge_qset *qs, u_int id)
822{
823
824 qs->rspq.cntxt_id = id;
825 qs->fl[0].cntxt_id = 2 * id;
826 qs->fl[1].cntxt_id = 2 * id + 1;
827 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
828 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
829 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
830 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
831 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
832}
833
834
835static void
836txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
837{
838 txq->in_use += ndesc;
839 /*
840 * XXX we don't handle stopping of queue
841 * presumably start handles this when we bump against the end
842 */
843 txqs->gen = txq->gen;
844 txq->unacked += ndesc;
845 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
846 txq->unacked &= 7;
847 txqs->pidx = txq->pidx;
848 txq->pidx += ndesc;
849
850 if (txq->pidx >= txq->size) {
851 txq->pidx -= txq->size;
852 txq->gen ^= 1;
853 }
854
855}
856
857/**
858 * calc_tx_descs - calculate the number of Tx descriptors for a packet
859 * @m: the packet mbufs
860 * @nsegs: the number of segments
861 *
862 * Returns the number of Tx descriptors needed for the given Ethernet
863 * packet. Ethernet packets require addition of WR and CPL headers.
864 */
865static __inline unsigned int
866calc_tx_descs(const struct mbuf *m, int nsegs)
867{
868 unsigned int flits;
869
870 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
871 return 1;
872
873 flits = sgl_len(nsegs) + 2;
874#ifdef TSO_SUPPORTED
875 if (m->m_pkthdr.csum_flags & (CSUM_TSO))
876 flits++;
877#endif
878 return flits_to_desc(flits);
879}
880
881static unsigned int
882busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
883 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs)
884{
885 struct mbuf *m0;
886 int err, pktlen;
887
888 m0 = *m;
889 pktlen = m0->m_pkthdr.len;
890
891 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
892#ifdef DEBUG
893 if (err) {
894 int n = 0;
895 struct mbuf *mtmp = m0;
896 while(mtmp) {
897 n++;
898 mtmp = mtmp->m_next;
899 }
900 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
901 err, m0->m_pkthdr.len, n);
902 }
903#endif
904 if (err == EFBIG) {
905 /* Too many segments, try to defrag */
906 m0 = m_defrag(m0, M_NOWAIT);
907 if (m0 == NULL) {
908 m_freem(*m);
909 *m = NULL;
910 return (ENOBUFS);
911 }
912 *m = m0;
913 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
914 }
915
916 if (err == ENOMEM) {
917 return (err);
918 }
919
920 if (err) {
921 if (cxgb_debug)
922 printf("map failure err=%d pktlen=%d\n", err, pktlen);
923 m_freem_vec(m0);
924 *m = NULL;
925 return (err);
926 }
927
928 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE);
929 stx->flags |= TX_SW_DESC_MAPPED;
930
931 return (0);
932}
933
934/**
935 * make_sgl - populate a scatter/gather list for a packet
936 * @sgp: the SGL to populate
937 * @segs: the packet dma segments
938 * @nsegs: the number of segments
939 *
940 * Generates a scatter/gather list for the buffers that make up a packet
941 * and returns the SGL size in 8-byte words. The caller must size the SGL
942 * appropriately.
943 */
944static __inline void
945make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
946{
947 int i, idx;
948
949 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
950 if (i && idx == 0)
951 ++sgp;
952
953 sgp->len[idx] = htobe32(segs[i].ds_len);
954 sgp->addr[idx] = htobe64(segs[i].ds_addr);
955 }
956
957 if (idx)
958 sgp->len[idx] = 0;
959}
960
961/**
962 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
963 * @adap: the adapter
964 * @q: the Tx queue
965 *
966 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
967 * where the HW is going to sleep just after we checked, however,
968 * then the interrupt handler will detect the outstanding TX packet
969 * and ring the doorbell for us.
970 *
971 * When GTS is disabled we unconditionally ring the doorbell.
972 */
973static __inline void
974check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
975{
976#if USE_GTS
977 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
978 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
979 set_bit(TXQ_LAST_PKT_DB, &q->flags);
980#ifdef T3_TRACE
981 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
982 q->cntxt_id);
983#endif
984 t3_write_reg(adap, A_SG_KDOORBELL,
985 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
986 }
987#else
988 wmb(); /* write descriptors before telling HW */
989 t3_write_reg(adap, A_SG_KDOORBELL,
990 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
991#endif
992}
993
994static __inline void
995wr_gen2(struct tx_desc *d, unsigned int gen)
996{
997#if SGE_NUM_GENBITS == 2
998 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
999#endif
1000}
1001
1002
1003
1004/**
1005 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1006 * @ndesc: number of Tx descriptors spanned by the SGL
1007 * @txd: first Tx descriptor to be written
1008 * @txqs: txq state (generation and producer index)
1009 * @txq: the SGE Tx queue
1010 * @sgl: the SGL
1011 * @flits: number of flits to the start of the SGL in the first descriptor
1012 * @sgl_flits: the SGL size in flits
1013 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1014 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1015 *
1016 * Write a work request header and an associated SGL. If the SGL is
1017 * small enough to fit into one Tx descriptor it has already been written
1018 * and we just need to write the WR header. Otherwise we distribute the
1019 * SGL across the number of descriptors it spans.
1020 */
1021
1022static void
1023write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1024 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1025 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1026{
1027
1028 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1029 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1030
1031 if (__predict_true(ndesc == 1)) {
1032 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1033 V_WR_SGLSFLT(flits)) | wr_hi;
1034 wmb();
1035 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1036 V_WR_GEN(txqs->gen)) | wr_lo;
1037 /* XXX gen? */
1038 wr_gen2(txd, txqs->gen);
1039 } else {
1040 unsigned int ogen = txqs->gen;
1041 const uint64_t *fp = (const uint64_t *)sgl;
1042 struct work_request_hdr *wp = wrp;
1043
1044 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1045 V_WR_SGLSFLT(flits)) | wr_hi;
1046
1047 while (sgl_flits) {
1048 unsigned int avail = WR_FLITS - flits;
1049
1050 if (avail > sgl_flits)
1051 avail = sgl_flits;
1052 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1053 sgl_flits -= avail;
1054 ndesc--;
1055 if (!sgl_flits)
1056 break;
1057
1058 fp += avail;
1059 txd++;
1060 txsd++;
1061 if (++txqs->pidx == txq->size) {
1062 txqs->pidx = 0;
1063 txqs->gen ^= 1;
1064 txd = txq->desc;
1065 txsd = txq->sdesc;
1066 }
1067
1068 /*
1069 * when the head of the mbuf chain
1070 * is freed all clusters will be freed
1071 * with it
1072 */
1073 txsd->m = NULL;
1074 wrp = (struct work_request_hdr *)txd;
1075 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1076 V_WR_SGLSFLT(1)) | wr_hi;
1077 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1078 sgl_flits + 1)) |
1079 V_WR_GEN(txqs->gen)) | wr_lo;
1080 wr_gen2(txd, txqs->gen);
1081 flits = 1;
1082 }
1083 wrp->wr_hi |= htonl(F_WR_EOP);
1084 wmb();
1085 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1086 wr_gen2((struct tx_desc *)wp, ogen);
1087 }
1088}
1089
1090
1091/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1092#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1093
1094int
1095t3_encap(struct port_info *p, struct mbuf **m)
1096{
1097 adapter_t *sc;
1098 struct mbuf *m0;
1099 struct sge_qset *qs;
1100 struct sge_txq *txq;
1101 struct tx_sw_desc *stx;
1102 struct txq_state txqs;
1103 unsigned int nsegs, ndesc, flits, cntrl, mlen;
1104 int err, tso_info = 0;
1105
1106 struct work_request_hdr *wrp;
1107 struct tx_sw_desc *txsd;
1108 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1109 bus_dma_segment_t segs[TX_MAX_SEGS];
1110 uint32_t wr_hi, wr_lo, sgl_flits;
1111
1112 struct tx_desc *txd;
1113 struct cpl_tx_pkt *cpl;
1114
1115 DPRINTF("t3_encap ");
1116 m0 = *m;
1117 sc = p->adapter;
1118 qs = &sc->sge.qs[p->first_qset];
1119 txq = &qs->txq[TXQ_ETH];
1120 stx = &txq->sdesc[txq->pidx];
1121 txd = &txq->desc[txq->pidx];
1122 cpl = (struct cpl_tx_pkt *)txd;
1123 mlen = m0->m_pkthdr.len;
1124 cpl->len = htonl(mlen | 0x80000000);
1125
1126 DPRINTF("mlen=%d\n", mlen);
1127 /*
1128 * XXX handle checksum, TSO, and VLAN here
1129 *
1130 */
1131 cntrl = V_TXPKT_INTF(p->port);
1132
1133 /*
1134 * XXX need to add VLAN support for 6.x
1135 */
1136#ifdef VLAN_SUPPORTED
1137 if (m0->m_flags & M_VLANTAG)
1138 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
1139 if (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1140 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1141#endif
1142 if (tso_info) {
1143 int eth_type;
1144 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
1145 struct ip *ip;
1146 struct tcphdr *tcp;
1147 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
1148
1149 txd->flit[2] = 0;
1150 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1151 hdr->cntrl = htonl(cntrl);
1152
1153 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1154 pkthdr = &tmp[0];
1155 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
1156 } else {
1157 pkthdr = mtod(m0, uint8_t *);
1158 }
1159
1160 if (__predict_false(m0->m_flags & M_VLANTAG)) {
1161 eth_type = CPL_ETH_II_VLAN;
1162 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1163 ETHER_VLAN_ENCAP_LEN);
1164 } else {
1165 eth_type = CPL_ETH_II;
1166 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1167 }
1168 tcp = (struct tcphdr *)((uint8_t *)ip +
1169 sizeof(*ip));
1170
1171 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1172 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1173 V_LSO_TCPHDR_WORDS(tcp->th_off);
1174 hdr->lso_info = htonl(tso_info);
1175 flits = 3;
1176 } else {
1177 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1178 cpl->cntrl = htonl(cntrl);
1179
1180 if (mlen <= WR_LEN - sizeof(*cpl)) {
1181 txq_prod(txq, 1, &txqs);
1182 txq->sdesc[txqs.pidx].m = m0;
1183 m_set_priority(m0, txqs.pidx);
1184
1185 if (m0->m_len == m0->m_pkthdr.len)
1186 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
1187 else
1188 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1189
1190 flits = (mlen + 7) / 8 + 2;
1191 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1192 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1193 F_WR_SOP | F_WR_EOP | txqs.compl);
1194 wmb();
1195 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1196 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1197
1198 wr_gen2(txd, txqs.gen);
1199 check_ring_tx_db(sc, txq);
1200 return (0);
1201 }
1202 flits = 2;
1203 }
1204
1205 wrp = (struct work_request_hdr *)txd;
1206
1207 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
1208 return (err);
1209 }
1210 m0 = *m;
1211 ndesc = calc_tx_descs(m0, nsegs);
1212
1213 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1214 make_sgl(sgp, segs, nsegs);
1215
1216 sgl_flits = sgl_len(nsegs);
1217
1218 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1219 txq_prod(txq, ndesc, &txqs);
1220 txsd = &txq->sdesc[txqs.pidx];
1221 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1222 wr_lo = htonl(V_WR_TID(txq->token));
1223 txsd->m = m0;
1224 m_set_priority(m0, txqs.pidx);
1225
1226 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1227 check_ring_tx_db(p->adapter, txq);
1228
1229 return (0);
1230}
1231
1232
1233/**
1234 * write_imm - write a packet into a Tx descriptor as immediate data
1235 * @d: the Tx descriptor to write
1236 * @m: the packet
1237 * @len: the length of packet data to write as immediate data
1238 * @gen: the generation bit value to write
1239 *
1240 * Writes a packet as immediate data into a Tx descriptor. The packet
1241 * contains a work request at its beginning. We must write the packet
1242 * carefully so the SGE doesn't read accidentally before it's written in
1243 * its entirety.
1244 */
1245static __inline void
1246write_imm(struct tx_desc *d, struct mbuf *m,
1247 unsigned int len, unsigned int gen)
1248{
1249 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1250 struct work_request_hdr *to = (struct work_request_hdr *)d;
1251
1252 memcpy(&to[1], &from[1], len - sizeof(*from));
1253 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1254 V_WR_BCNTLFLT(len & 7));
1255 wmb();
1256 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1257 V_WR_LEN((len + 7) / 8));
1258 wr_gen2(d, gen);
1259 m_freem(m);
1260}
1261
1262/**
1263 * check_desc_avail - check descriptor availability on a send queue
1264 * @adap: the adapter
1265 * @q: the TX queue
1266 * @m: the packet needing the descriptors
1267 * @ndesc: the number of Tx descriptors needed
1268 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1269 *
1270 * Checks if the requested number of Tx descriptors is available on an
1271 * SGE send queue. If the queue is already suspended or not enough
1272 * descriptors are available the packet is queued for later transmission.
1273 * Must be called with the Tx queue locked.
1274 *
1275 * Returns 0 if enough descriptors are available, 1 if there aren't
1276 * enough descriptors and the packet has been queued, and 2 if the caller
1277 * needs to retry because there weren't enough descriptors at the
1278 * beginning of the call but some freed up in the mean time.
1279 */
1280static __inline int
1281check_desc_avail(adapter_t *adap, struct sge_txq *q,
1282 struct mbuf *m, unsigned int ndesc,
1283 unsigned int qid)
1284{
1285 /*
1286 * XXX We currently only use this for checking the control queue
1287 * the control queue is only used for binding qsets which happens
1288 * at init time so we are guaranteed enough descriptors
1289 */
1290 if (__predict_false(!mbufq_empty(&q->sendq))) {
1291addq_exit: mbufq_tail(&q->sendq, m);
1292 return 1;
1293 }
1294 if (__predict_false(q->size - q->in_use < ndesc)) {
1295
1296 struct sge_qset *qs = txq_to_qset(q, qid);
1297
1298 setbit(&qs->txq_stopped, qid);
1299 smp_mb();
1300
1301 if (should_restart_tx(q) &&
1302 test_and_clear_bit(qid, &qs->txq_stopped))
1303 return 2;
1304
1305 q->stops++;
1306 goto addq_exit;
1307 }
1308 return 0;
1309}
1310
1311
1312/**
1313 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1314 * @q: the SGE control Tx queue
1315 *
1316 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1317 * that send only immediate data (presently just the control queues) and
1318 * thus do not have any mbufs
1319 */
1320static __inline void
1321reclaim_completed_tx_imm(struct sge_txq *q)
1322{
1323 unsigned int reclaim = q->processed - q->cleaned;
1324
1325 mtx_assert(&q->lock, MA_OWNED);
1326
1327 q->in_use -= reclaim;
1328 q->cleaned += reclaim;
1329}
1330
1331static __inline int
1332immediate(const struct mbuf *m)
1333{
1334 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1335}
1336
1337/**
1338 * ctrl_xmit - send a packet through an SGE control Tx queue
1339 * @adap: the adapter
1340 * @q: the control queue
1341 * @m: the packet
1342 *
1343 * Send a packet through an SGE control Tx queue. Packets sent through
1344 * a control queue must fit entirely as immediate data in a single Tx
1345 * descriptor and have no page fragments.
1346 */
1347static int
1348ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1349{
1350 int ret;
1351 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1352
1353 if (__predict_false(!immediate(m))) {
1354 m_freem(m);
1355 return 0;
1356 }
1357
1358 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1359 wrp->wr_lo = htonl(V_WR_TID(q->token));
1360
1361 mtx_lock(&q->lock);
1362again: reclaim_completed_tx_imm(q);
1363
1364 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1365 if (__predict_false(ret)) {
1366 if (ret == 1) {
1367 mtx_unlock(&q->lock);
1368 return (-1);
1369 }
1370 goto again;
1371 }
1372
1373 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1374
1375 q->in_use++;
1376 if (++q->pidx >= q->size) {
1377 q->pidx = 0;
1378 q->gen ^= 1;
1379 }
1380 mtx_unlock(&q->lock);
1381 wmb();
1382 t3_write_reg(adap, A_SG_KDOORBELL,
1383 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1384 return (0);
1385}
1386
1387
1388/**
1389 * restart_ctrlq - restart a suspended control queue
1390 * @qs: the queue set cotaining the control queue
1391 *
1392 * Resumes transmission on a suspended Tx control queue.
1393 */
1394static void
1395restart_ctrlq(void *data, int npending)
1396{
1397 struct mbuf *m;
1398 struct sge_qset *qs = (struct sge_qset *)data;
1399 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1400 adapter_t *adap = qs->port->adapter;
1401
1402 mtx_lock(&q->lock);
1403again: reclaim_completed_tx_imm(q);
1404
1405 while (q->in_use < q->size &&
1406 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1407
1408 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1409
1410 if (++q->pidx >= q->size) {
1411 q->pidx = 0;
1412 q->gen ^= 1;
1413 }
1414 q->in_use++;
1415 }
1416 if (!mbufq_empty(&q->sendq)) {
1417 setbit(&qs->txq_stopped, TXQ_CTRL);
1418 smp_mb();
1419
1420 if (should_restart_tx(q) &&
1421 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1422 goto again;
1423 q->stops++;
1424 }
1425 mtx_unlock(&q->lock);
1426 t3_write_reg(adap, A_SG_KDOORBELL,
1427 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1428}
1429
1430
1431/*
1432 * Send a management message through control queue 0
1433 */
1434int
1435t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1436{
1437 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1438}
1439
1440/**
1441 * free_qset - free the resources of an SGE queue set
1442 * @sc: the controller owning the queue set
1443 * @q: the queue set
1444 *
1445 * Release the HW and SW resources associated with an SGE queue set, such
1446 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1447 * queue set must be quiesced prior to calling this.
1448 */
1449static void
1450t3_free_qset(adapter_t *sc, struct sge_qset *q)
1451{
1452 int i;
1453
1454 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1455 if (q->fl[i].desc) {
1456 mtx_lock(&sc->sge.reg_lock);
1457 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1458 mtx_unlock(&sc->sge.reg_lock);
1459 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1460 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1461 q->fl[i].desc_map);
1462 bus_dma_tag_destroy(q->fl[i].desc_tag);
1463 bus_dma_tag_destroy(q->fl[i].entry_tag);
1464 }
1465 if (q->fl[i].sdesc) {
1466 free_rx_bufs(sc, &q->fl[i]);
1467 free(q->fl[i].sdesc, M_DEVBUF);
1468 }
1469 }
1470
1471 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1472 if (q->txq[i].desc) {
1473 mtx_lock(&sc->sge.reg_lock);
1474 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1475 mtx_unlock(&sc->sge.reg_lock);
1476 bus_dmamap_unload(q->txq[i].desc_tag,
1477 q->txq[i].desc_map);
1478 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1479 q->txq[i].desc_map);
1480 bus_dma_tag_destroy(q->txq[i].desc_tag);
1481 bus_dma_tag_destroy(q->txq[i].entry_tag);
1482 MTX_DESTROY(&q->txq[i].lock);
1483 }
1484 if (q->txq[i].sdesc) {
1485 free(q->txq[i].sdesc, M_DEVBUF);
1486 }
1487 }
1488
1489 if (q->rspq.desc) {
1490 mtx_lock(&sc->sge.reg_lock);
1491 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1492 mtx_unlock(&sc->sge.reg_lock);
1493
1494 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1495 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1496 q->rspq.desc_map);
1497 bus_dma_tag_destroy(q->rspq.desc_tag);
1498 MTX_DESTROY(&q->rspq.lock);
1499 }
1500
1501 bzero(q, sizeof(*q));
1502}
1503
1504/**
1505 * t3_free_sge_resources - free SGE resources
1506 * @sc: the adapter softc
1507 *
1508 * Frees resources used by the SGE queue sets.
1509 */
1510void
1511t3_free_sge_resources(adapter_t *sc)
1512{
1513 int i, nqsets;
1514
1515 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1516 nqsets += sc->port[i].nqsets;
1517
1518 for (i = 0; i < nqsets; ++i)
1519 t3_free_qset(sc, &sc->sge.qs[i]);
1520}
1521
1522/**
1523 * t3_sge_start - enable SGE
1524 * @sc: the controller softc
1525 *
1526 * Enables the SGE for DMAs. This is the last step in starting packet
1527 * transfers.
1528 */
1529void
1530t3_sge_start(adapter_t *sc)
1531{
1532 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1533}
1534
1535/**
1536 * t3_sge_stop - disable SGE operation
1537 * @sc: the adapter
1538 *
1539 * Disables the DMA engine. This can be called in emeregencies (e.g.,
1540 * from error interrupts) or from normal process context. In the latter
1541 * case it also disables any pending queue restart tasklets. Note that
1542 * if it is called in interrupt context it cannot disable the restart
1543 * tasklets as it cannot wait, however the tasklets will have no effect
1544 * since the doorbells are disabled and the driver will call this again
1545 * later from process context, at which time the tasklets will be stopped
1546 * if they are still running.
1547 */
1548void
1549t3_sge_stop(adapter_t *sc)
1550{
1551 int i, nqsets;
1552
1553 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1554
1555 if (sc->tq == NULL)
1556 return;
1557
1558 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1559 nqsets += sc->port[i].nqsets;
1560
1561 for (i = 0; i < nqsets; ++i) {
1562 struct sge_qset *qs = &sc->sge.qs[i];
1563
1564 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk);
1565 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk);
1566 }
1567}
1568
1569
1570/**
1571 * free_tx_desc - reclaims Tx descriptors and their buffers
1572 * @adapter: the adapter
1573 * @q: the Tx queue to reclaim descriptors from
1574 * @n: the number of descriptors to reclaim
1575 *
1576 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1577 * Tx buffers. Called with the Tx queue lock held.
1578 */
1579int
1580free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec)
1581{
1582 struct tx_sw_desc *d;
1583 unsigned int cidx = q->cidx;
1584 int nbufs = 0;
1585
1586#ifdef T3_TRACE
1587 T3_TRACE2(sc->tb[q->cntxt_id & 7],
1588 "reclaiming %u Tx descriptors at cidx %u", n, cidx);
1589#endif
1590 d = &q->sdesc[cidx];
1591
1592 while (n-- > 0) {
1593 DPRINTF("cidx=%d d=%p\n", cidx, d);
1594 if (d->m) {
1595 if (d->flags & TX_SW_DESC_MAPPED) {
1596 bus_dmamap_unload(q->entry_tag, d->map);
1597 bus_dmamap_destroy(q->entry_tag, d->map);
1598 d->flags &= ~TX_SW_DESC_MAPPED;
1599 }
1600 if (m_get_priority(d->m) == cidx) {
1601 m_vec[nbufs] = d->m;
1602 d->m = NULL;
1603 nbufs++;
1604 } else {
1605 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
1606 }
1607 }
1608 ++d;
1609 if (++cidx == q->size) {
1610 cidx = 0;
1611 d = q->sdesc;
1612 }
1613 }
1614 q->cidx = cidx;
1615
1616 return (nbufs);
1617}
1618
1619/**
1620 * is_new_response - check if a response is newly written
1621 * @r: the response descriptor
1622 * @q: the response queue
1623 *
1624 * Returns true if a response descriptor contains a yet unprocessed
1625 * response.
1626 */
1627static __inline int
1628is_new_response(const struct rsp_desc *r,
1629 const struct sge_rspq *q)
1630{
1631 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1632}
1633
1634#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1635#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1636 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1637 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1638 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1639
1640/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1641#define NOMEM_INTR_DELAY 2500
1642
1643/**
1644 * write_ofld_wr - write an offload work request
1645 * @adap: the adapter
1646 * @m: the packet to send
1647 * @q: the Tx queue
1648 * @pidx: index of the first Tx descriptor to write
1649 * @gen: the generation value to use
1650 * @ndesc: number of descriptors the packet will occupy
1651 *
1652 * Write an offload work request to send the supplied packet. The packet
1653 * data already carry the work request with most fields populated.
1654 */
1655static void
1656write_ofld_wr(adapter_t *adap, struct mbuf *m,
1657 struct sge_txq *q, unsigned int pidx,
1658 unsigned int gen, unsigned int ndesc,
1659 bus_dma_segment_t *segs, unsigned int nsegs)
1660{
1661 unsigned int sgl_flits, flits;
1662 struct work_request_hdr *from;
1663 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1664 struct tx_desc *d = &q->desc[pidx];
1665 struct txq_state txqs;
1666
1667 if (immediate(m)) {
1668 q->sdesc[pidx].m = NULL;
1669 write_imm(d, m, m->m_len, gen);
1670 return;
1671 }
1672
1673 /* Only TX_DATA builds SGLs */
1674
1675 from = mtod(m, struct work_request_hdr *);
1676 memcpy(&d->flit[1], &from[1],
1677 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from));
1678
1679 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8;
1680 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1681
1682 make_sgl(sgp, segs, nsegs);
1683 sgl_flits = sgl_len(nsegs);
1684
1685 txqs.gen = q->gen;
1686 txqs.pidx = q->pidx;
1687 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1688 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1689 from->wr_hi, from->wr_lo);
1690}
1691
1692/**
1693 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1694 * @m: the packet
1695 *
1696 * Returns the number of Tx descriptors needed for the given offload
1697 * packet. These packets are already fully constructed.
1698 */
1699static __inline unsigned int
1700calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1701{
1702 unsigned int flits, cnt = 0;
1703
1704
1705 if (m->m_len <= WR_LEN)
1706 return 1; /* packet fits as immediate data */
1707
1708 if (m->m_flags & M_IOVEC)
1709 cnt = mtomv(m)->mv_count;
1710
1711 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */
1712
1713 return flits_to_desc(flits + sgl_len(cnt));
1714}
1715
1716/**
1717 * ofld_xmit - send a packet through an offload queue
1718 * @adap: the adapter
1719 * @q: the Tx offload queue
1720 * @m: the packet
1721 *
1722 * Send an offload packet through an SGE offload queue.
1723 */
1724static int
1725ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1726{
1727 int ret;
1728 unsigned int pidx, gen, nsegs;
1729 unsigned int ndesc;
1730 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1731 bus_dma_segment_t segs[TX_MAX_SEGS];
1732 int i, cleaned;
1733 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1734
1735 mtx_lock(&q->lock);
1736 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
1737 mtx_unlock(&q->lock);
1738 return (ret);
1739 }
1740 ndesc = calc_tx_descs_ofld(m, nsegs);
1741again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
1742
1743 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1744 if (__predict_false(ret)) {
1745 if (ret == 1) {
1746 m_set_priority(m, ndesc); /* save for restart */
1747 mtx_unlock(&q->lock);
1748 return NET_XMIT_CN;
1749 }
1750 goto again;
1751 }
1752
1753 gen = q->gen;
1754 q->in_use += ndesc;
1755 pidx = q->pidx;
1756 q->pidx += ndesc;
1757 if (q->pidx >= q->size) {
1758 q->pidx -= q->size;
1759 q->gen ^= 1;
1760 }
1761#ifdef T3_TRACE
1762 T3_TRACE5(adap->tb[q->cntxt_id & 7],
1763 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
1764 ndesc, pidx, skb->len, skb->len - skb->data_len,
1765 skb_shinfo(skb)->nr_frags);
1766#endif
1767 mtx_unlock(&q->lock);
1768
1769 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1770 check_ring_tx_db(adap, q);
1771
1772 for (i = 0; i < cleaned; i++) {
1773 m_freem_vec(m_vec[i]);
1774 }
1775 return NET_XMIT_SUCCESS;
1776}
1777
1778/**
1779 * restart_offloadq - restart a suspended offload queue
1780 * @qs: the queue set cotaining the offload queue
1781 *
1782 * Resumes transmission on a suspended Tx offload queue.
1783 */
1784static void
1785restart_offloadq(void *data, int npending)
1786{
1787
1788 struct mbuf *m;
1789 struct sge_qset *qs = data;
1790 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1791 adapter_t *adap = qs->port->adapter;
1792 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1793 bus_dma_segment_t segs[TX_MAX_SEGS];
1794 int nsegs, i, cleaned;
1795 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1796
1797 mtx_lock(&q->lock);
1798again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
1799
1800 while ((m = mbufq_peek(&q->sendq)) != NULL) {
1801 unsigned int gen, pidx;
1802 unsigned int ndesc = m_get_priority(m);
1803
1804 if (__predict_false(q->size - q->in_use < ndesc)) {
1805 setbit(&qs->txq_stopped, TXQ_OFLD);
1806 smp_mb();
1807
1808 if (should_restart_tx(q) &&
1809 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1810 goto again;
1811 q->stops++;
1812 break;
1813 }
1814
1815 gen = q->gen;
1816 q->in_use += ndesc;
1817 pidx = q->pidx;
1818 q->pidx += ndesc;
1819 if (q->pidx >= q->size) {
1820 q->pidx -= q->size;
1821 q->gen ^= 1;
1822 }
1823
1824 (void)mbufq_dequeue(&q->sendq);
1825 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
1826 mtx_unlock(&q->lock);
1827 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1828 mtx_lock(&q->lock);
1829 }
1830 mtx_unlock(&q->lock);
1831
1832#if USE_GTS
1833 set_bit(TXQ_RUNNING, &q->flags);
1834 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1835#endif
1836 t3_write_reg(adap, A_SG_KDOORBELL,
1837 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1838
1839 for (i = 0; i < cleaned; i++) {
1840 m_freem_vec(m_vec[i]);
1841 }
1842}
1843
1844/**
1845 * queue_set - return the queue set a packet should use
1846 * @m: the packet
1847 *
1848 * Maps a packet to the SGE queue set it should use. The desired queue
1849 * set is carried in bits 1-3 in the packet's priority.
1850 */
1851static __inline int
1852queue_set(const struct mbuf *m)
1853{
1854 return m_get_priority(m) >> 1;
1855}
1856
1857/**
1858 * is_ctrl_pkt - return whether an offload packet is a control packet
1859 * @m: the packet
1860 *
1861 * Determines whether an offload packet should use an OFLD or a CTRL
1862 * Tx queue. This is indicated by bit 0 in the packet's priority.
1863 */
1864static __inline int
1865is_ctrl_pkt(const struct mbuf *m)
1866{
1867 return m_get_priority(m) & 1;
1868}
1869
1870/**
1871 * t3_offload_tx - send an offload packet
1872 * @tdev: the offload device to send to
1873 * @m: the packet
1874 *
1875 * Sends an offload packet. We use the packet priority to select the
1876 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1877 * should be sent as regular or control, bits 1-3 select the queue set.
1878 */
1879int
1880t3_offload_tx(struct toedev *tdev, struct mbuf *m)
1881{
1882 adapter_t *adap = tdev2adap(tdev);
1883 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
1884
1885 if (__predict_false(is_ctrl_pkt(m)))
1886 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
1887
1888 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
1889}
1890
1891/**
1892 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1893 * @tdev: the offload device that will be receiving the packets
1894 * @q: the SGE response queue that assembled the bundle
1895 * @m: the partial bundle
1896 * @n: the number of packets in the bundle
1897 *
1898 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1899 */
1900static __inline void
1901deliver_partial_bundle(struct toedev *tdev,
1902 struct sge_rspq *q,
1903 struct mbuf *mbufs[], int n)
1904{
1905 if (n) {
1906 q->offload_bundles++;
1907 cxgb_ofld_recv(tdev, mbufs, n);
1908 }
1909}
1910
1911static __inline int
1912rx_offload(struct toedev *tdev, struct sge_rspq *rq,
1913 struct mbuf *m, struct mbuf *rx_gather[],
1914 unsigned int gather_idx)
1915{
1916 rq->offload_pkts++;
1917 m->m_pkthdr.header = mtod(m, void *);
1918
1919 rx_gather[gather_idx++] = m;
1920 if (gather_idx == RX_BUNDLE_SIZE) {
1921 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1922 gather_idx = 0;
1923 rq->offload_bundles++;
1924 }
1925 return (gather_idx);
1926}
1927
1928static void
1929restart_tx(struct sge_qset *qs)
1930{
1931 struct adapter *sc = qs->port->adapter;
1932
1933 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
1934 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1935 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1936 qs->txq[TXQ_OFLD].restarts++;
1937 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk);
1938 }
1939 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
1940 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1941 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1942 qs->txq[TXQ_CTRL].restarts++;
1943 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk);
1944 }
1945}
1946
1947/**
1948 * t3_sge_alloc_qset - initialize an SGE queue set
1949 * @sc: the controller softc
1950 * @id: the queue set id
1951 * @nports: how many Ethernet ports will be using this queue set
1952 * @irq_vec_idx: the IRQ vector index for response queue interrupts
1953 * @p: configuration parameters for this queue set
1954 * @ntxq: number of Tx queues for the queue set
1955 * @pi: port info for queue set
1956 *
1957 * Allocate resources and initialize an SGE queue set. A queue set
1958 * comprises a response queue, two Rx free-buffer queues, and up to 3
1959 * Tx queues. The Tx queues are assigned roles in the order Ethernet
1960 * queue, offload queue, and control queue.
1961 */
1962int
1963t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
1964 const struct qset_params *p, int ntxq, struct port_info *pi)
1965{
1966 struct sge_qset *q = &sc->sge.qs[id];
1967 int i, ret = 0;
1968
1969 init_qset_cntxt(q, id);
1970
1971 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
1972 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
1973 &q->fl[0].desc, &q->fl[0].sdesc,
1974 &q->fl[0].desc_tag, &q->fl[0].desc_map,
1975 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
1976 printf("error %d from alloc ring fl0\n", ret);
1977 goto err;
1978 }
1979
1980 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
1981 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
1982 &q->fl[1].desc, &q->fl[1].sdesc,
1983 &q->fl[1].desc_tag, &q->fl[1].desc_map,
1984 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
1985 printf("error %d from alloc ring fl1\n", ret);
1986 goto err;
1987 }
1988
1989 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
1990 &q->rspq.phys_addr, &q->rspq.desc, NULL,
1991 &q->rspq.desc_tag, &q->rspq.desc_map,
1992 NULL, NULL)) != 0) {
1993 printf("error %d from alloc ring rspq\n", ret);
1994 goto err;
1995 }
1996
1997 for (i = 0; i < ntxq; ++i) {
1998 /*
1999 * The control queue always uses immediate data so does not
2000 * need to keep track of any mbufs.
2001 * XXX Placeholder for future TOE support.
2002 */
2003 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2004
2005 if ((ret = alloc_ring(sc, p->txq_size[i],
2006 sizeof(struct tx_desc), sz,
2007 &q->txq[i].phys_addr, &q->txq[i].desc,
2008 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2009 &q->txq[i].desc_map,
2010 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2011 printf("error %d from alloc ring tx %i\n", ret, i);
2012 goto err;
2013 }
2014 mbufq_init(&q->txq[i].sendq);
2015 q->txq[i].gen = 1;
2016 q->txq[i].size = p->txq_size[i];
2017 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2018 device_get_unit(sc->dev), irq_vec_idx, i);
2019 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2020 }
2021
2022 TASK_INIT(&q->txq[TXQ_OFLD].qresume_tsk, 0, restart_offloadq, q);
2023 TASK_INIT(&q->txq[TXQ_CTRL].qresume_tsk, 0, restart_ctrlq, q);
2024
2025 q->fl[0].gen = q->fl[1].gen = 1;
2026 q->fl[0].size = p->fl_size;
2027 q->fl[1].size = p->jumbo_size;
2028
2029 q->rspq.gen = 1;
2030 q->rspq.size = p->rspq_size;
2031
2032 q->txq[TXQ_ETH].stop_thres = nports *
2033 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2034
2035 q->fl[0].buf_size = MCLBYTES;
2036 q->fl[0].zone = zone_clust;
2037 q->fl[0].type = EXT_CLUSTER;
2038 q->fl[1].buf_size = MJUMPAGESIZE;
2039 q->fl[1].zone = zone_jumbop;
2040 q->fl[1].type = EXT_JUMBOP;
2041
2042 q->lro.enabled = lro_default;
2043
2044 mtx_lock(&sc->sge.reg_lock);
2045 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2046 q->rspq.phys_addr, q->rspq.size,
2047 q->fl[0].buf_size, 1, 0);
2048 if (ret) {
2049 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2050 goto err_unlock;
2051 }
2052
2053 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2054 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2055 q->fl[i].phys_addr, q->fl[i].size,
2056 q->fl[i].buf_size, p->cong_thres, 1,
2057 0);
2058 if (ret) {
2059 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2060 goto err_unlock;
2061 }
2062 }
2063
2064 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2065 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2066 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2067 1, 0);
2068 if (ret) {
2069 printf("error %d from t3_sge_init_ecntxt\n", ret);
2070 goto err_unlock;
2071 }
2072
2073 if (ntxq > 1) {
2074 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2075 USE_GTS, SGE_CNTXT_OFLD, id,
2076 q->txq[TXQ_OFLD].phys_addr,
2077 q->txq[TXQ_OFLD].size, 0, 1, 0);
2078 if (ret) {
2079 printf("error %d from t3_sge_init_ecntxt\n", ret);
2080 goto err_unlock;
2081 }
2082 }
2083
2084 if (ntxq > 2) {
2085 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2086 SGE_CNTXT_CTRL, id,
2087 q->txq[TXQ_CTRL].phys_addr,
2088 q->txq[TXQ_CTRL].size,
2089 q->txq[TXQ_CTRL].token, 1, 0);
2090 if (ret) {
2091 printf("error %d from t3_sge_init_ecntxt\n", ret);
2092 goto err_unlock;
2093 }
2094 }
2095
2096 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2097 device_get_unit(sc->dev), irq_vec_idx);
2098 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2099
2100 mtx_unlock(&sc->sge.reg_lock);
2101 t3_update_qset_coalesce(q, p);
2102 q->port = pi;
2103
2104 refill_fl(sc, &q->fl[0], q->fl[0].size);
2105 refill_fl(sc, &q->fl[1], q->fl[1].size);
2106 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2107
2108 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2109 V_NEWTIMER(q->rspq.holdoff_tmr));
2110
2111 return (0);
2112
2113err_unlock:
2114 mtx_unlock(&sc->sge.reg_lock);
2115err:
2116 t3_free_qset(sc, q);
2117
2118 return (ret);
2119}
2120
2121void
2122t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2123{
2124 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2125 struct ifnet *ifp = pi->ifp;
2126
2127 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2128 if (&pi->adapter->port[cpl->iff] != pi)
2129 panic("bad port index %d m->m_data=%p\n", cpl->iff, mtod(m, uint8_t *));
2130
2131 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2132 cpl->csum_valid && cpl->csum == 0xffff) {
2133 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2134 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2135 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2136 m->m_pkthdr.csum_data = 0xffff;
2137 }
2138 /*
2139 * XXX need to add VLAN support for 6.x
2140 */
2141#ifdef VLAN_SUPPORTED
2142 if (__predict_false(cpl->vlan_valid)) {
2143 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2144 m->m_flags |= M_VLANTAG;
2145 }
2146#endif
2147
2148 m->m_pkthdr.rcvif = ifp;
2149 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2150 m_explode(m);
2151 /*
2152 * adjust after conversion to mbuf chain
2153 */
2154 m_adj(m, sizeof(*cpl) + ethpad);
2155
2156 (*ifp->if_input)(ifp, m);
2157}
2158
2159/**
2160 * get_packet - return the next ingress packet buffer from a free list
2161 * @adap: the adapter that received the packet
2162 * @drop_thres: # of remaining buffers before we start dropping packets
2163 * @qs: the qset that the SGE free list holding the packet belongs to
2164 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2165 * @r: response descriptor
2166 *
2167 * Get the next packet from a free list and complete setup of the
2168 * sk_buff. If the packet is small we make a copy and recycle the
2169 * original buffer, otherwise we use the original buffer itself. If a
2170 * positive drop threshold is supplied packets are dropped and their
2171 * buffers recycled if (a) the number of remaining buffers is under the
2172 * threshold and the packet is too big to copy, or (b) the packet should
2173 * be copied but there is no memory for the copy.
2174 */
2175static int
2176get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2177 struct mbuf *m, struct rsp_desc *r)
2178{
2179
2180 unsigned int len_cq = ntohl(r->len_cq);
2181 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2182 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2183 uint32_t len = G_RSPD_LEN(len_cq);
2184 uint32_t flags = ntohl(r->flags);
2185 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2186 void *cl;
2187 int ret = 0;
2188
2189 prefetch(sd->cl);
2190
2191 fl->credits--;
2192 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2193
2194 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2195 cl = mtod(m, void *);
2196 memcpy(cl, sd->cl, len);
2197 recycle_rx_buf(adap, fl, fl->cidx);
2198 } else {
2199 cl = sd->cl;
2200 bus_dmamap_unload(fl->entry_tag, sd->map);
2201 }
2202 switch(sopeop) {
2203 case RSPQ_SOP_EOP:
2204 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2205 if (cl == sd->cl)
2206 m_cljset(m, cl, fl->type);
2207 m->m_len = m->m_pkthdr.len = len;
2208 ret = 1;
2209 goto done;
2210 break;
2211 case RSPQ_NSOP_NEOP:
2212 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2213 ret = 0;
2214 break;
2215 case RSPQ_SOP:
2216 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2217 m_iovinit(m);
2218 ret = 0;
2219 break;
2220 case RSPQ_EOP:
2221 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2222 ret = 1;
2223 break;
2224 }
2225 m_iovappend(m, cl, fl->buf_size, len, 0);
2226
2227done:
2228 if (++fl->cidx == fl->size)
2229 fl->cidx = 0;
2230
2231 return (ret);
2232}
2233
2234/**
2235 * handle_rsp_cntrl_info - handles control information in a response
2236 * @qs: the queue set corresponding to the response
2237 * @flags: the response control flags
2238 *
2239 * Handles the control information of an SGE response, such as GTS
2240 * indications and completion credits for the queue set's Tx queues.
2241 * HW coalesces credits, we don't do any extra SW coalescing.
2242 */
2243static __inline void
2244handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2245{
2246 unsigned int credits;
2247
2248#if USE_GTS
2249 if (flags & F_RSPD_TXQ0_GTS)
2250 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2251#endif
2252 credits = G_RSPD_TXQ0_CR(flags);
2253 if (credits) {
2254 qs->txq[TXQ_ETH].processed += credits;
2255 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
2256 taskqueue_enqueue(qs->port->adapter->tq,
2257 &qs->port->timer_reclaim_task);
2258 }
2259
2260 credits = G_RSPD_TXQ2_CR(flags);
2261 if (credits)
2262 qs->txq[TXQ_CTRL].processed += credits;
2263
2264# if USE_GTS
2265 if (flags & F_RSPD_TXQ1_GTS)
2266 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2267# endif
2268 credits = G_RSPD_TXQ1_CR(flags);
2269 if (credits)
2270 qs->txq[TXQ_OFLD].processed += credits;
2271}
2272
2273static void
2274check_ring_db(adapter_t *adap, struct sge_qset *qs,
2275 unsigned int sleeping)
2276{
2277 ;
2278}
2279
2280/**
2281 * process_responses - process responses from an SGE response queue
2282 * @adap: the adapter
2283 * @qs: the queue set to which the response queue belongs
2284 * @budget: how many responses can be processed in this round
2285 *
2286 * Process responses from an SGE response queue up to the supplied budget.
2287 * Responses include received packets as well as credits and other events
2288 * for the queues that belong to the response queue's queue set.
2289 * A negative budget is effectively unlimited.
2290 *
2291 * Additionally choose the interrupt holdoff time for the next interrupt
2292 * on this queue. If the system is under memory shortage use a fairly
2293 * long delay to help recovery.
2294 */
2295static int
2296process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2297{
2298 struct sge_rspq *rspq = &qs->rspq;
2299 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2300 int budget_left = budget;
2301 unsigned int sleeping = 0;
2302 int lro = qs->lro.enabled;
2303 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2304 int ngathered = 0;
2305#ifdef DEBUG
2306 static int last_holdoff = 0;
2307 if (rspq->holdoff_tmr != last_holdoff) {
2308 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2309 last_holdoff = rspq->holdoff_tmr;
2310 }
2311#endif
2312 rspq->next_holdoff = rspq->holdoff_tmr;
2313
2314 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2315 int eth, eop = 0, ethpad = 0;
2316 uint32_t flags = ntohl(r->flags);
2317 uint32_t rss_csum = *(const uint32_t *)r;
2318 uint32_t rss_hash = r->rss_hdr.rss_hash_val;
2319
2320 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2321
2322 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2323 /* XXX */
2324 printf("async notification\n");
2325
2326 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2327 struct mbuf *m = NULL;
2328 if (cxgb_debug)
2329 printf("IMM DATA VALID\n");
2330 if (rspq->m == NULL)
2331 rspq->m = m_gethdr(M_NOWAIT, MT_DATA);
2332 else
2333 m = m_gethdr(M_NOWAIT, MT_DATA);
2334
2335 if (rspq->m == NULL || m == NULL) {
2336 rspq->next_holdoff = NOMEM_INTR_DELAY;
2337 budget_left--;
2338 break;
2339 }
2340 get_imm_packet(adap, r, rspq->m, m);
2341 eop = 1;
2342 rspq->imm_data++;
2343 } else if (r->len_cq) {
2344 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2345
2346 if (rspq->m == NULL)
2347 rspq->m = m_gethdr(M_NOWAIT, MT_DATA);
2348 if (rspq->m == NULL) {
2349 log(LOG_WARNING, "failed to get mbuf for packet\n");
2350 break;
2351 }
2352
2353 ethpad = 2;
2354 eop = get_packet(adap, drop_thresh, qs, rspq->m, r);
2355 } else {
2356 DPRINTF("pure response\n");
2357 rspq->pure_rsps++;
2358 }
2359
2360 if (flags & RSPD_CTRL_MASK) {
2361 sleeping |= flags & RSPD_GTS_MASK;
2362 handle_rsp_cntrl_info(qs, flags);
2363 }
2364
2365 r++;
2366 if (__predict_false(++rspq->cidx == rspq->size)) {
2367 rspq->cidx = 0;
2368 rspq->gen ^= 1;
2369 r = rspq->desc;
2370 }
2371
2372 prefetch(r);
2373 if (++rspq->credits >= (rspq->size / 4)) {
2374 refill_rspq(adap, rspq, rspq->credits);
2375 rspq->credits = 0;
2376 }
2377
2378 if (eop) {
2379 prefetch(mtod(rspq->m, uint8_t *));
2380 prefetch(mtod(rspq->m, uint8_t *) + L1_CACHE_BYTES);
2381
2382 if (eth) {
2383 t3_rx_eth_lro(adap, rspq, rspq->m, ethpad,
2384 rss_hash, rss_csum, lro);
2385
2386 rspq->m = NULL;
2387 } else {
2388 rspq->m->m_pkthdr.csum_data = rss_csum;
2389 /*
2390 * XXX size mismatch
2391 */
2392 m_set_priority(rspq->m, rss_hash);
2393
2394 ngathered = rx_offload(&adap->tdev, rspq, rspq->m,
2395 offload_mbufs, ngathered);
2396 }
2397#ifdef notyet
2398 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task);
2399#else
2400 __refill_fl(adap, &qs->fl[0]);
2401 __refill_fl(adap, &qs->fl[1]);
2402#endif
2403 }
2404 --budget_left;
2405 }
2406
2407 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2408 t3_lro_flush(adap, qs, &qs->lro);
2409
2410 if (sleeping)
2411 check_ring_db(adap, qs, sleeping);
2412
2413 smp_mb(); /* commit Tx queue processed updates */
2414 if (__predict_false(qs->txq_stopped != 0))
2415 restart_tx(qs);
2416
2417 budget -= budget_left;
2418 return (budget);
2419}
2420
2421/*
2422 * A helper function that processes responses and issues GTS.
2423 */
2424static __inline int
2425process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2426{
2427 int work;
2428 static int last_holdoff = 0;
2429
2430 work = process_responses(adap, rspq_to_qset(rq), -1);
2431
2432 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2433 printf("next_holdoff=%d\n", rq->next_holdoff);
2434 last_holdoff = rq->next_holdoff;
2435 }
2436
2437 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2438 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2439 return work;
2440}
2441
2442
2443/*
2444 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2445 * Handles data events from SGE response queues as well as error and other
2446 * async events as they all use the same interrupt pin. We use one SGE
2447 * response queue per port in this mode and protect all response queues with
2448 * queue 0's lock.
2449 */
2450void
2451t3b_intr(void *data)
2452{
2453 uint32_t map;
2454 adapter_t *adap = data;
2455 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2456 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2457
2458 t3_write_reg(adap, A_PL_CLI, 0);
2459 map = t3_read_reg(adap, A_SG_DATA_INTR);
2460
2461 if (!map)
2462 return;
2463
2464 if (__predict_false(map & F_ERRINTR))
2465 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2466
2467 mtx_lock(&q0->lock);
2468
2469 if (__predict_true(map & 1))
2470 process_responses_gts(adap, q0);
2471
2472 if (map & 2)
2473 process_responses_gts(adap, q1);
2474
2475 mtx_unlock(&q0->lock);
2476}
2477
2478/*
2479 * The MSI interrupt handler. This needs to handle data events from SGE
2480 * response queues as well as error and other async events as they all use
2481 * the same MSI vector. We use one SGE response queue per port in this mode
2482 * and protect all response queues with queue 0's lock.
2483 */
2484void
2485t3_intr_msi(void *data)
2486{
2487 adapter_t *adap = data;
2488 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2489 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2490 int new_packets = 0;
2491
2492 mtx_lock(&q0->lock);
2493 if (process_responses_gts(adap, q0)) {
2494 new_packets = 1;
2495 }
2496
2497 if (adap->params.nports == 2 &&
2498 process_responses_gts(adap, q1)) {
2499 new_packets = 1;
2500 }
2501
2502 mtx_unlock(&q0->lock);
2503 if (new_packets == 0)
2504 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2505}
2506
2507void
2508t3_intr_msix(void *data)
2509{
2510 struct sge_qset *qs = data;
2511 adapter_t *adap = qs->port->adapter;
2512 struct sge_rspq *rspq = &qs->rspq;
2513
2514 mtx_lock(&rspq->lock);
2515 if (process_responses_gts(adap, rspq) == 0)
2516 rspq->unhandled_irqs++;
2517 mtx_unlock(&rspq->lock);
2518}
2519
2520/*
2521 * broken by recent mbuf changes
2522 */
2523static int
2524t3_lro_enable(SYSCTL_HANDLER_ARGS)
2525{
2526 adapter_t *sc;
2527 int i, j, enabled, err, nqsets = 0;
2528
2529#ifndef LRO_WORKING
2530 return (0);
2531#endif
2532
2533 sc = arg1;
2534 enabled = sc->sge.qs[0].lro.enabled;
2535 err = sysctl_handle_int(oidp, &enabled, arg2, req);
2536
2537 if (err != 0)
2538 return (err);
2539 if (enabled == sc->sge.qs[0].lro.enabled)
2540 return (0);
2541
2542 for (i = 0; i < sc->params.nports; i++)
2543 for (j = 0; j < sc->port[i].nqsets; j++)
2544 nqsets++;
2545
2546 for (i = 0; i < nqsets; i++)
2547 sc->sge.qs[i].lro.enabled = enabled;
2548
2549 return (0);
2550}
2551
2552static int
2553t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
2554{
2555 adapter_t *sc = arg1;
2556 struct qset_params *qsp = &sc->params.sge.qset[0];
2557 int coalesce_nsecs;
2558 struct sge_qset *qs;
2559 int i, j, err, nqsets = 0;
2560 struct mtx *lock;
2561
2562 coalesce_nsecs = qsp->coalesce_nsecs;
2563 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
2564
2565 if (err != 0) {
2566 return (err);
2567 }
2568 if (coalesce_nsecs == qsp->coalesce_nsecs)
2569 return (0);
2570
2571 for (i = 0; i < sc->params.nports; i++)
2572 for (j = 0; j < sc->port[i].nqsets; j++)
2573 nqsets++;
2574
2575 coalesce_nsecs = max(100, coalesce_nsecs);
2576
2577 for (i = 0; i < nqsets; i++) {
2578 qs = &sc->sge.qs[i];
2579 qsp = &sc->params.sge.qset[i];
2580 qsp->coalesce_nsecs = coalesce_nsecs;
2581
2582 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
2583 &sc->sge.qs[0].rspq.lock;
2584
2585 mtx_lock(lock);
2586 t3_update_qset_coalesce(qs, qsp);
2587 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2588 V_NEWTIMER(qs->rspq.holdoff_tmr));
2589 mtx_unlock(lock);
2590 }
2591
2592 return (0);
2593}
2594
2595
2596void
2597t3_add_sysctls(adapter_t *sc)
2598{
2599 struct sysctl_ctx_list *ctx;
2600 struct sysctl_oid_list *children;
2601
2602 ctx = device_get_sysctl_ctx(sc->dev);
2603 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
2604
2605 /* random information */
2606 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
2607 "firmware_version",
2608 CTLFLAG_RD, &sc->fw_version,
2609 0, "firmware version");
2610
2611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2612 "enable_lro",
2613 CTLTYPE_INT|CTLFLAG_RW, sc,
2614 0, t3_lro_enable,
2615 "I", "enable large receive offload");
2616
2617 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2618 "intr_coal",
2619 CTLTYPE_INT|CTLFLAG_RW, sc,
2620 0, t3_set_coalesce_nsecs,
2621 "I", "interrupt coalescing timer (ns)");
2622 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2623 "enable_debug",
2624 CTLFLAG_RW, &cxgb_debug,
2625 0, "enable verbose debugging output");
2626
2627 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2628 "collapse_free",
2629 CTLFLAG_RD, &collapse_free,
2630 0, "frees during collapse");
2631 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2632 "mb_free_vec_free",
2633 CTLFLAG_RD, &mb_free_vec_free,
2634 0, "frees during mb_free_vec");
2635 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2636 "collapse_mbufs",
2637 CTLFLAG_RW, &collapse_mbufs,
2638 0, "collapse mbuf chains into iovecs");
2639}
2640
2641/**
2642 * t3_get_desc - dump an SGE descriptor for debugging purposes
2643 * @qs: the queue set
2644 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2645 * @idx: the descriptor index in the queue
2646 * @data: where to dump the descriptor contents
2647 *
2648 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2649 * size of the descriptor.
2650 */
2651int
2652t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2653 unsigned char *data)
2654{
2655 if (qnum >= 6)
2656 return (EINVAL);
2657
2658 if (qnum < 3) {
2659 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2660 return -EINVAL;
2661 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2662 return sizeof(struct tx_desc);
2663 }
2664
2665 if (qnum == 3) {
2666 if (!qs->rspq.desc || idx >= qs->rspq.size)
2667 return (EINVAL);
2668 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2669 return sizeof(struct rsp_desc);
2670 }
2671
2672 qnum -= 4;
2673 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2674 return (EINVAL);
2675 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2676 return sizeof(struct rx_desc);
2677}