Deleted Added
full compact
cxgb_sge.c (170789) cxgb_sge.c (170869)
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 170789 2007-06-15 20:02:02Z kmacy $");
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 170869 2007-06-17 04:33:38Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/conf.h>
39#include <machine/bus.h>
40#include <machine/resource.h>
41#include <sys/bus_dma.h>
42#include <sys/rman.h>
43#include <sys/queue.h>
44#include <sys/sysctl.h>
45#include <sys/taskqueue.h>
46
47
48#include <sys/proc.h>
49#include <sys/sched.h>
50#include <sys/smp.h>
51#include <sys/systm.h>
52
53#include <netinet/in_systm.h>
54#include <netinet/in.h>
55#include <netinet/ip.h>
56#include <netinet/tcp.h>
57
58#include <dev/pci/pcireg.h>
59#include <dev/pci/pcivar.h>
60
61#ifdef CONFIG_DEFINED
62#include <cxgb_include.h>
63#else
64#include <dev/cxgb/cxgb_include.h>
65#endif
66
67uint32_t collapse_free = 0;
68uint32_t mb_free_vec_free = 0;
69int collapse_mbufs = 0;
70static int recycle_enable = 1;
71
72
73/*
74 * XXX GC
75 */
76#define NET_XMIT_CN 2
77#define NET_XMIT_SUCCESS 0
78
79#define USE_GTS 0
80
81#define SGE_RX_SM_BUF_SIZE 1536
82#define SGE_RX_DROP_THRES 16
83#define SGE_RX_COPY_THRES 128
84
85/*
86 * Period of the Tx buffer reclaim timer. This timer does not need to run
87 * frequently as Tx buffers are usually reclaimed by new Tx packets.
88 */
89#define TX_RECLAIM_PERIOD (hz >> 1)
90
91/*
92 * work request size in bytes
93 */
94#define WR_LEN (WR_FLITS * 8)
95
96/*
97 * Values for sge_txq.flags
98 */
99enum {
100 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
101 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
102};
103
104struct tx_desc {
105 uint64_t flit[TX_DESC_FLITS];
106} __packed;
107
108struct rx_desc {
109 uint32_t addr_lo;
110 uint32_t len_gen;
111 uint32_t gen2;
112 uint32_t addr_hi;
113} __packed;;
114
115struct rsp_desc { /* response queue descriptor */
116 struct rss_header rss_hdr;
117 uint32_t flags;
118 uint32_t len_cq;
119 uint8_t imm_data[47];
120 uint8_t intr_gen;
121} __packed;
122
123#define RX_SW_DESC_MAP_CREATED (1 << 0)
124#define TX_SW_DESC_MAP_CREATED (1 << 1)
125#define RX_SW_DESC_INUSE (1 << 3)
126#define TX_SW_DESC_MAPPED (1 << 4)
127
128#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
129#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
130#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
131#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
132
133struct tx_sw_desc { /* SW state per Tx descriptor */
134 struct mbuf *m;
135 bus_dmamap_t map;
136 int flags;
137};
138
139struct rx_sw_desc { /* SW state per Rx descriptor */
140 void *cl;
141 bus_dmamap_t map;
142 int flags;
143};
144
145struct txq_state {
146 unsigned int compl;
147 unsigned int gen;
148 unsigned int pidx;
149};
150
151struct refill_fl_cb_arg {
152 int error;
153 bus_dma_segment_t seg;
154 int nseg;
155};
156
157/*
158 * Maps a number of flits to the number of Tx descriptors that can hold them.
159 * The formula is
160 *
161 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
162 *
163 * HW allows up to 4 descriptors to be combined into a WR.
164 */
165static uint8_t flit_desc_map[] = {
166 0,
167#if SGE_NUM_GENBITS == 1
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
171 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
172#elif SGE_NUM_GENBITS == 2
173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
175 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
176 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
177#else
178# error "SGE_NUM_GENBITS must be 1 or 2"
179#endif
180};
181
182
183static int lro_default = 0;
184int cxgb_debug = 0;
185
186static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
187static void sge_timer_cb(void *arg);
188static void sge_timer_reclaim(void *arg, int ncount);
189static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec);
190
191/**
192 * reclaim_completed_tx - reclaims completed Tx descriptors
193 * @adapter: the adapter
194 * @q: the Tx queue to reclaim completed descriptors from
195 *
196 * Reclaims Tx descriptors that the SGE has indicated it has processed,
197 * and frees the associated buffers if possible. Called with the Tx
198 * queue's lock held.
199 */
200static __inline int
201reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec)
202{
203 int reclaimed, reclaim = desc_reclaimable(q);
204 int n = 0;
205
206 mtx_assert(&q->lock, MA_OWNED);
207 if (reclaim > 0) {
208 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec);
209 reclaimed = min(reclaim, nbufs);
210 q->cleaned += reclaimed;
211 q->in_use -= reclaimed;
212 }
213 return (n);
214}
215
216/**
217 * should_restart_tx - are there enough resources to restart a Tx queue?
218 * @q: the Tx queue
219 *
220 * Checks if there are enough descriptors to restart a suspended Tx queue.
221 */
222static __inline int
223should_restart_tx(const struct sge_txq *q)
224{
225 unsigned int r = q->processed - q->cleaned;
226
227 return q->in_use - r < (q->size >> 1);
228}
229
230/**
231 * t3_sge_init - initialize SGE
232 * @adap: the adapter
233 * @p: the SGE parameters
234 *
235 * Performs SGE initialization needed every time after a chip reset.
236 * We do not initialize any of the queue sets here, instead the driver
237 * top-level must request those individually. We also do not enable DMA
238 * here, that should be done after the queues have been set up.
239 */
240void
241t3_sge_init(adapter_t *adap, struct sge_params *p)
242{
243 u_int ctrl, ups;
244
245 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
246
247 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
248 F_CQCRDTCTRL |
249 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
250 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
251#if SGE_NUM_GENBITS == 1
252 ctrl |= F_EGRGENCTRL;
253#endif
254 if (adap->params.rev > 0) {
255 if (!(adap->flags & (USING_MSIX | USING_MSI)))
256 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
257 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
258 }
259 t3_write_reg(adap, A_SG_CONTROL, ctrl);
260 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
261 V_LORCQDRBTHRSH(512));
262 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
263 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
264 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
265 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
266 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
267 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
268 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
269 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
270 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
271}
272
273
274/**
275 * sgl_len - calculates the size of an SGL of the given capacity
276 * @n: the number of SGL entries
277 *
278 * Calculates the number of flits needed for a scatter/gather list that
279 * can hold the given number of entries.
280 */
281static __inline unsigned int
282sgl_len(unsigned int n)
283{
284 return ((3 * n) / 2 + (n & 1));
285}
286
287/**
288 * get_imm_packet - return the next ingress packet buffer from a response
289 * @resp: the response descriptor containing the packet data
290 *
291 * Return a packet containing the immediate data of the given response.
292 */
293static __inline void
294get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl)
295{
296 int len;
297 uint32_t flags = ntohl(resp->flags);
298 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
299
300 /*
301 * would be a firmware bug
302 */
303 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
304 return;
305
306 len = G_RSPD_LEN(ntohl(resp->len_cq));
307 switch (sopeop) {
308 case RSPQ_SOP_EOP:
309 m->m_len = m->m_pkthdr.len = len;
310 memcpy(mtod(m, uint8_t *), resp->imm_data, len);
311 break;
312 case RSPQ_EOP:
313 memcpy(cl, resp->imm_data, len);
314 m_iovappend(m, cl, MSIZE, len, 0);
315 break;
316 }
317}
318
319
320static __inline u_int
321flits_to_desc(u_int n)
322{
323 return (flit_desc_map[n]);
324}
325
326void
327t3_sge_err_intr_handler(adapter_t *adapter)
328{
329 unsigned int v, status;
330
331
332 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
333
334 if (status & F_RSPQCREDITOVERFOW)
335 CH_ALERT(adapter, "SGE response queue credit overflow\n");
336
337 if (status & F_RSPQDISABLED) {
338 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
339
340 CH_ALERT(adapter,
341 "packet delivered to disabled response queue (0x%x)\n",
342 (v >> S_RSPQ0DISABLED) & 0xff);
343 }
344
345 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
346 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
347 t3_fatal_err(adapter);
348}
349
350void
351t3_sge_prep(adapter_t *adap, struct sge_params *p)
352{
353 int i;
354
355 /* XXX Does ETHER_ALIGN need to be accounted for here? */
356 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
357
358 for (i = 0; i < SGE_QSETS; ++i) {
359 struct qset_params *q = p->qset + i;
360
361 q->polling = adap->params.rev > 0;
362
363 q->coalesce_nsecs = 5000;
364
365 q->rspq_size = RSPQ_Q_SIZE;
366 q->fl_size = FL_Q_SIZE;
367 q->jumbo_size = JUMBO_Q_SIZE;
368 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
369 q->txq_size[TXQ_OFLD] = 1024;
370 q->txq_size[TXQ_CTRL] = 256;
371 q->cong_thres = 0;
372 }
373}
374
375int
376t3_sge_alloc(adapter_t *sc)
377{
378
379 /* The parent tag. */
380 if (bus_dma_tag_create( NULL, /* parent */
381 1, 0, /* algnmnt, boundary */
382 BUS_SPACE_MAXADDR, /* lowaddr */
383 BUS_SPACE_MAXADDR, /* highaddr */
384 NULL, NULL, /* filter, filterarg */
385 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
386 BUS_SPACE_UNRESTRICTED, /* nsegments */
387 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
388 0, /* flags */
389 NULL, NULL, /* lock, lockarg */
390 &sc->parent_dmat)) {
391 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
392 return (ENOMEM);
393 }
394
395 /*
396 * DMA tag for normal sized RX frames
397 */
398 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
399 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
400 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
401 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
402 return (ENOMEM);
403 }
404
405 /*
406 * DMA tag for jumbo sized RX frames.
407 */
408 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR,
409 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE,
410 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
411 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
412 return (ENOMEM);
413 }
414
415 /*
416 * DMA tag for TX frames.
417 */
418 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
419 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
420 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
421 NULL, NULL, &sc->tx_dmat)) {
422 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
423 return (ENOMEM);
424 }
425
426 return (0);
427}
428
429int
430t3_sge_free(struct adapter * sc)
431{
432
433 if (sc->tx_dmat != NULL)
434 bus_dma_tag_destroy(sc->tx_dmat);
435
436 if (sc->rx_jumbo_dmat != NULL)
437 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
438
439 if (sc->rx_dmat != NULL)
440 bus_dma_tag_destroy(sc->rx_dmat);
441
442 if (sc->parent_dmat != NULL)
443 bus_dma_tag_destroy(sc->parent_dmat);
444
445 return (0);
446}
447
448void
449t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
450{
451
452 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
453 qs->rspq.polling = 0 /* p->polling */;
454}
455
456static void
457refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
458{
459 struct refill_fl_cb_arg *cb_arg = arg;
460
461 cb_arg->error = error;
462 cb_arg->seg = segs[0];
463 cb_arg->nseg = nseg;
464
465}
466
467/**
468 * refill_fl - refill an SGE free-buffer list
469 * @sc: the controller softc
470 * @q: the free-list to refill
471 * @n: the number of new buffers to allocate
472 *
473 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
474 * The caller must assure that @n does not exceed the queue's capacity.
475 */
476static void
477refill_fl(adapter_t *sc, struct sge_fl *q, int n)
478{
479 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
480 struct rx_desc *d = &q->desc[q->pidx];
481 struct refill_fl_cb_arg cb_arg;
482 void *cl;
483 int err;
484
485 cb_arg.error = 0;
486 while (n--) {
487 /*
488 * We only allocate a cluster, mbuf allocation happens after rx
489 */
490 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) {
491 log(LOG_WARNING, "Failed to allocate cluster\n");
492 goto done;
493 }
494 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
495 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
496 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
497 uma_zfree(q->zone, cl);
498 goto done;
499 }
500 sd->flags |= RX_SW_DESC_MAP_CREATED;
501 }
502 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size,
503 refill_fl_cb, &cb_arg, 0);
504
505 if (err != 0 || cb_arg.error) {
506 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
507 /*
508 * XXX free cluster
509 */
510 return;
511 }
512
513 sd->flags |= RX_SW_DESC_INUSE;
514 sd->cl = cl;
515 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
516 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
517 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
518 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
519
520 d++;
521 sd++;
522
523 if (++q->pidx == q->size) {
524 q->pidx = 0;
525 q->gen ^= 1;
526 sd = q->sdesc;
527 d = q->desc;
528 }
529 q->credits++;
530 }
531
532done:
533 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
534}
535
536
537/**
538 * free_rx_bufs - free the Rx buffers on an SGE free list
539 * @sc: the controle softc
540 * @q: the SGE free list to clean up
541 *
542 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
543 * this queue should be stopped before calling this function.
544 */
545static void
546free_rx_bufs(adapter_t *sc, struct sge_fl *q)
547{
548 u_int cidx = q->cidx;
549
550 while (q->credits--) {
551 struct rx_sw_desc *d = &q->sdesc[cidx];
552
553 if (d->flags & RX_SW_DESC_INUSE) {
554 bus_dmamap_unload(q->entry_tag, d->map);
555 bus_dmamap_destroy(q->entry_tag, d->map);
556 uma_zfree(q->zone, d->cl);
557 }
558 d->cl = NULL;
559 if (++cidx == q->size)
560 cidx = 0;
561 }
562}
563
564static __inline void
565__refill_fl(adapter_t *adap, struct sge_fl *fl)
566{
567 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
568}
569
570/**
571 * recycle_rx_buf - recycle a receive buffer
572 * @adapter: the adapter
573 * @q: the SGE free list
574 * @idx: index of buffer to recycle
575 *
576 * Recycles the specified buffer on the given free list by adding it at
577 * the next available slot on the list.
578 */
579static void
580recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
581{
582 struct rx_desc *from = &q->desc[idx];
583 struct rx_desc *to = &q->desc[q->pidx];
584
585 q->sdesc[q->pidx] = q->sdesc[idx];
586 to->addr_lo = from->addr_lo; // already big endian
587 to->addr_hi = from->addr_hi; // likewise
588 wmb();
589 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
590 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
591 q->credits++;
592
593 if (++q->pidx == q->size) {
594 q->pidx = 0;
595 q->gen ^= 1;
596 }
597 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
598}
599
600static void
601alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
602{
603 uint32_t *addr;
604
605 addr = arg;
606 *addr = segs[0].ds_addr;
607}
608
609static int
610alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
611 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
612 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
613{
614 size_t len = nelem * elem_size;
615 void *s = NULL;
616 void *p = NULL;
617 int err;
618
619 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
620 BUS_SPACE_MAXADDR_32BIT,
621 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
622 len, 0, NULL, NULL, tag)) != 0) {
623 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
624 return (ENOMEM);
625 }
626
627 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
628 map)) != 0) {
629 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
630 return (ENOMEM);
631 }
632
633 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
634 bzero(p, len);
635 *(void **)desc = p;
636
637 if (sw_size) {
638 len = nelem * sw_size;
639 s = malloc(len, M_DEVBUF, M_WAITOK);
640 bzero(s, len);
641 *(void **)sdesc = s;
642 }
643 if (parent_entry_tag == NULL)
644 return (0);
645
646 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
647 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
648 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
649 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
650 NULL, NULL, entry_tag)) != 0) {
651 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
652 return (ENOMEM);
653 }
654 return (0);
655}
656
657static void
658sge_slow_intr_handler(void *arg, int ncount)
659{
660 adapter_t *sc = arg;
661
662 t3_slow_intr_handler(sc);
663}
664
665static void
666sge_timer_cb(void *arg)
667{
668 adapter_t *sc = arg;
669 struct port_info *p;
670 struct sge_qset *qs;
671 struct sge_txq *txq;
672 int i, j;
673 int reclaim_eth, reclaim_ofl, refill_rx;
674
675 for (i = 0; i < sc->params.nports; i++)
676 for (j = 0; j < sc->port[i].nqsets; j++) {
677 qs = &sc->sge.qs[i + j];
678 txq = &qs->txq[0];
679 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
680 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
681 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
682 (qs->fl[1].credits < qs->fl[1].size));
683 if (reclaim_eth || reclaim_ofl || refill_rx) {
684 p = &sc->port[i];
685 taskqueue_enqueue(p->tq, &p->timer_reclaim_task);
686 break;
687 }
688 }
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/conf.h>
39#include <machine/bus.h>
40#include <machine/resource.h>
41#include <sys/bus_dma.h>
42#include <sys/rman.h>
43#include <sys/queue.h>
44#include <sys/sysctl.h>
45#include <sys/taskqueue.h>
46
47
48#include <sys/proc.h>
49#include <sys/sched.h>
50#include <sys/smp.h>
51#include <sys/systm.h>
52
53#include <netinet/in_systm.h>
54#include <netinet/in.h>
55#include <netinet/ip.h>
56#include <netinet/tcp.h>
57
58#include <dev/pci/pcireg.h>
59#include <dev/pci/pcivar.h>
60
61#ifdef CONFIG_DEFINED
62#include <cxgb_include.h>
63#else
64#include <dev/cxgb/cxgb_include.h>
65#endif
66
67uint32_t collapse_free = 0;
68uint32_t mb_free_vec_free = 0;
69int collapse_mbufs = 0;
70static int recycle_enable = 1;
71
72
73/*
74 * XXX GC
75 */
76#define NET_XMIT_CN 2
77#define NET_XMIT_SUCCESS 0
78
79#define USE_GTS 0
80
81#define SGE_RX_SM_BUF_SIZE 1536
82#define SGE_RX_DROP_THRES 16
83#define SGE_RX_COPY_THRES 128
84
85/*
86 * Period of the Tx buffer reclaim timer. This timer does not need to run
87 * frequently as Tx buffers are usually reclaimed by new Tx packets.
88 */
89#define TX_RECLAIM_PERIOD (hz >> 1)
90
91/*
92 * work request size in bytes
93 */
94#define WR_LEN (WR_FLITS * 8)
95
96/*
97 * Values for sge_txq.flags
98 */
99enum {
100 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
101 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
102};
103
104struct tx_desc {
105 uint64_t flit[TX_DESC_FLITS];
106} __packed;
107
108struct rx_desc {
109 uint32_t addr_lo;
110 uint32_t len_gen;
111 uint32_t gen2;
112 uint32_t addr_hi;
113} __packed;;
114
115struct rsp_desc { /* response queue descriptor */
116 struct rss_header rss_hdr;
117 uint32_t flags;
118 uint32_t len_cq;
119 uint8_t imm_data[47];
120 uint8_t intr_gen;
121} __packed;
122
123#define RX_SW_DESC_MAP_CREATED (1 << 0)
124#define TX_SW_DESC_MAP_CREATED (1 << 1)
125#define RX_SW_DESC_INUSE (1 << 3)
126#define TX_SW_DESC_MAPPED (1 << 4)
127
128#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
129#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
130#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
131#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
132
133struct tx_sw_desc { /* SW state per Tx descriptor */
134 struct mbuf *m;
135 bus_dmamap_t map;
136 int flags;
137};
138
139struct rx_sw_desc { /* SW state per Rx descriptor */
140 void *cl;
141 bus_dmamap_t map;
142 int flags;
143};
144
145struct txq_state {
146 unsigned int compl;
147 unsigned int gen;
148 unsigned int pidx;
149};
150
151struct refill_fl_cb_arg {
152 int error;
153 bus_dma_segment_t seg;
154 int nseg;
155};
156
157/*
158 * Maps a number of flits to the number of Tx descriptors that can hold them.
159 * The formula is
160 *
161 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
162 *
163 * HW allows up to 4 descriptors to be combined into a WR.
164 */
165static uint8_t flit_desc_map[] = {
166 0,
167#if SGE_NUM_GENBITS == 1
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
171 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
172#elif SGE_NUM_GENBITS == 2
173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
175 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
176 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
177#else
178# error "SGE_NUM_GENBITS must be 1 or 2"
179#endif
180};
181
182
183static int lro_default = 0;
184int cxgb_debug = 0;
185
186static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
187static void sge_timer_cb(void *arg);
188static void sge_timer_reclaim(void *arg, int ncount);
189static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec);
190
191/**
192 * reclaim_completed_tx - reclaims completed Tx descriptors
193 * @adapter: the adapter
194 * @q: the Tx queue to reclaim completed descriptors from
195 *
196 * Reclaims Tx descriptors that the SGE has indicated it has processed,
197 * and frees the associated buffers if possible. Called with the Tx
198 * queue's lock held.
199 */
200static __inline int
201reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec)
202{
203 int reclaimed, reclaim = desc_reclaimable(q);
204 int n = 0;
205
206 mtx_assert(&q->lock, MA_OWNED);
207 if (reclaim > 0) {
208 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec);
209 reclaimed = min(reclaim, nbufs);
210 q->cleaned += reclaimed;
211 q->in_use -= reclaimed;
212 }
213 return (n);
214}
215
216/**
217 * should_restart_tx - are there enough resources to restart a Tx queue?
218 * @q: the Tx queue
219 *
220 * Checks if there are enough descriptors to restart a suspended Tx queue.
221 */
222static __inline int
223should_restart_tx(const struct sge_txq *q)
224{
225 unsigned int r = q->processed - q->cleaned;
226
227 return q->in_use - r < (q->size >> 1);
228}
229
230/**
231 * t3_sge_init - initialize SGE
232 * @adap: the adapter
233 * @p: the SGE parameters
234 *
235 * Performs SGE initialization needed every time after a chip reset.
236 * We do not initialize any of the queue sets here, instead the driver
237 * top-level must request those individually. We also do not enable DMA
238 * here, that should be done after the queues have been set up.
239 */
240void
241t3_sge_init(adapter_t *adap, struct sge_params *p)
242{
243 u_int ctrl, ups;
244
245 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
246
247 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
248 F_CQCRDTCTRL |
249 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
250 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
251#if SGE_NUM_GENBITS == 1
252 ctrl |= F_EGRGENCTRL;
253#endif
254 if (adap->params.rev > 0) {
255 if (!(adap->flags & (USING_MSIX | USING_MSI)))
256 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
257 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
258 }
259 t3_write_reg(adap, A_SG_CONTROL, ctrl);
260 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
261 V_LORCQDRBTHRSH(512));
262 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
263 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
264 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
265 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
266 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
267 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
268 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
269 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
270 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
271}
272
273
274/**
275 * sgl_len - calculates the size of an SGL of the given capacity
276 * @n: the number of SGL entries
277 *
278 * Calculates the number of flits needed for a scatter/gather list that
279 * can hold the given number of entries.
280 */
281static __inline unsigned int
282sgl_len(unsigned int n)
283{
284 return ((3 * n) / 2 + (n & 1));
285}
286
287/**
288 * get_imm_packet - return the next ingress packet buffer from a response
289 * @resp: the response descriptor containing the packet data
290 *
291 * Return a packet containing the immediate data of the given response.
292 */
293static __inline void
294get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl)
295{
296 int len;
297 uint32_t flags = ntohl(resp->flags);
298 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
299
300 /*
301 * would be a firmware bug
302 */
303 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
304 return;
305
306 len = G_RSPD_LEN(ntohl(resp->len_cq));
307 switch (sopeop) {
308 case RSPQ_SOP_EOP:
309 m->m_len = m->m_pkthdr.len = len;
310 memcpy(mtod(m, uint8_t *), resp->imm_data, len);
311 break;
312 case RSPQ_EOP:
313 memcpy(cl, resp->imm_data, len);
314 m_iovappend(m, cl, MSIZE, len, 0);
315 break;
316 }
317}
318
319
320static __inline u_int
321flits_to_desc(u_int n)
322{
323 return (flit_desc_map[n]);
324}
325
326void
327t3_sge_err_intr_handler(adapter_t *adapter)
328{
329 unsigned int v, status;
330
331
332 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
333
334 if (status & F_RSPQCREDITOVERFOW)
335 CH_ALERT(adapter, "SGE response queue credit overflow\n");
336
337 if (status & F_RSPQDISABLED) {
338 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
339
340 CH_ALERT(adapter,
341 "packet delivered to disabled response queue (0x%x)\n",
342 (v >> S_RSPQ0DISABLED) & 0xff);
343 }
344
345 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
346 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
347 t3_fatal_err(adapter);
348}
349
350void
351t3_sge_prep(adapter_t *adap, struct sge_params *p)
352{
353 int i;
354
355 /* XXX Does ETHER_ALIGN need to be accounted for here? */
356 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
357
358 for (i = 0; i < SGE_QSETS; ++i) {
359 struct qset_params *q = p->qset + i;
360
361 q->polling = adap->params.rev > 0;
362
363 q->coalesce_nsecs = 5000;
364
365 q->rspq_size = RSPQ_Q_SIZE;
366 q->fl_size = FL_Q_SIZE;
367 q->jumbo_size = JUMBO_Q_SIZE;
368 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
369 q->txq_size[TXQ_OFLD] = 1024;
370 q->txq_size[TXQ_CTRL] = 256;
371 q->cong_thres = 0;
372 }
373}
374
375int
376t3_sge_alloc(adapter_t *sc)
377{
378
379 /* The parent tag. */
380 if (bus_dma_tag_create( NULL, /* parent */
381 1, 0, /* algnmnt, boundary */
382 BUS_SPACE_MAXADDR, /* lowaddr */
383 BUS_SPACE_MAXADDR, /* highaddr */
384 NULL, NULL, /* filter, filterarg */
385 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
386 BUS_SPACE_UNRESTRICTED, /* nsegments */
387 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
388 0, /* flags */
389 NULL, NULL, /* lock, lockarg */
390 &sc->parent_dmat)) {
391 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
392 return (ENOMEM);
393 }
394
395 /*
396 * DMA tag for normal sized RX frames
397 */
398 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
399 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
400 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
401 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
402 return (ENOMEM);
403 }
404
405 /*
406 * DMA tag for jumbo sized RX frames.
407 */
408 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR,
409 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE,
410 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
411 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
412 return (ENOMEM);
413 }
414
415 /*
416 * DMA tag for TX frames.
417 */
418 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
419 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
420 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
421 NULL, NULL, &sc->tx_dmat)) {
422 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
423 return (ENOMEM);
424 }
425
426 return (0);
427}
428
429int
430t3_sge_free(struct adapter * sc)
431{
432
433 if (sc->tx_dmat != NULL)
434 bus_dma_tag_destroy(sc->tx_dmat);
435
436 if (sc->rx_jumbo_dmat != NULL)
437 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
438
439 if (sc->rx_dmat != NULL)
440 bus_dma_tag_destroy(sc->rx_dmat);
441
442 if (sc->parent_dmat != NULL)
443 bus_dma_tag_destroy(sc->parent_dmat);
444
445 return (0);
446}
447
448void
449t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
450{
451
452 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
453 qs->rspq.polling = 0 /* p->polling */;
454}
455
456static void
457refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
458{
459 struct refill_fl_cb_arg *cb_arg = arg;
460
461 cb_arg->error = error;
462 cb_arg->seg = segs[0];
463 cb_arg->nseg = nseg;
464
465}
466
467/**
468 * refill_fl - refill an SGE free-buffer list
469 * @sc: the controller softc
470 * @q: the free-list to refill
471 * @n: the number of new buffers to allocate
472 *
473 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
474 * The caller must assure that @n does not exceed the queue's capacity.
475 */
476static void
477refill_fl(adapter_t *sc, struct sge_fl *q, int n)
478{
479 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
480 struct rx_desc *d = &q->desc[q->pidx];
481 struct refill_fl_cb_arg cb_arg;
482 void *cl;
483 int err;
484
485 cb_arg.error = 0;
486 while (n--) {
487 /*
488 * We only allocate a cluster, mbuf allocation happens after rx
489 */
490 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) {
491 log(LOG_WARNING, "Failed to allocate cluster\n");
492 goto done;
493 }
494 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
495 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
496 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
497 uma_zfree(q->zone, cl);
498 goto done;
499 }
500 sd->flags |= RX_SW_DESC_MAP_CREATED;
501 }
502 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size,
503 refill_fl_cb, &cb_arg, 0);
504
505 if (err != 0 || cb_arg.error) {
506 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
507 /*
508 * XXX free cluster
509 */
510 return;
511 }
512
513 sd->flags |= RX_SW_DESC_INUSE;
514 sd->cl = cl;
515 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
516 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
517 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
518 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
519
520 d++;
521 sd++;
522
523 if (++q->pidx == q->size) {
524 q->pidx = 0;
525 q->gen ^= 1;
526 sd = q->sdesc;
527 d = q->desc;
528 }
529 q->credits++;
530 }
531
532done:
533 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
534}
535
536
537/**
538 * free_rx_bufs - free the Rx buffers on an SGE free list
539 * @sc: the controle softc
540 * @q: the SGE free list to clean up
541 *
542 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
543 * this queue should be stopped before calling this function.
544 */
545static void
546free_rx_bufs(adapter_t *sc, struct sge_fl *q)
547{
548 u_int cidx = q->cidx;
549
550 while (q->credits--) {
551 struct rx_sw_desc *d = &q->sdesc[cidx];
552
553 if (d->flags & RX_SW_DESC_INUSE) {
554 bus_dmamap_unload(q->entry_tag, d->map);
555 bus_dmamap_destroy(q->entry_tag, d->map);
556 uma_zfree(q->zone, d->cl);
557 }
558 d->cl = NULL;
559 if (++cidx == q->size)
560 cidx = 0;
561 }
562}
563
564static __inline void
565__refill_fl(adapter_t *adap, struct sge_fl *fl)
566{
567 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
568}
569
570/**
571 * recycle_rx_buf - recycle a receive buffer
572 * @adapter: the adapter
573 * @q: the SGE free list
574 * @idx: index of buffer to recycle
575 *
576 * Recycles the specified buffer on the given free list by adding it at
577 * the next available slot on the list.
578 */
579static void
580recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
581{
582 struct rx_desc *from = &q->desc[idx];
583 struct rx_desc *to = &q->desc[q->pidx];
584
585 q->sdesc[q->pidx] = q->sdesc[idx];
586 to->addr_lo = from->addr_lo; // already big endian
587 to->addr_hi = from->addr_hi; // likewise
588 wmb();
589 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
590 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
591 q->credits++;
592
593 if (++q->pidx == q->size) {
594 q->pidx = 0;
595 q->gen ^= 1;
596 }
597 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
598}
599
600static void
601alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
602{
603 uint32_t *addr;
604
605 addr = arg;
606 *addr = segs[0].ds_addr;
607}
608
609static int
610alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
611 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
612 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
613{
614 size_t len = nelem * elem_size;
615 void *s = NULL;
616 void *p = NULL;
617 int err;
618
619 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
620 BUS_SPACE_MAXADDR_32BIT,
621 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
622 len, 0, NULL, NULL, tag)) != 0) {
623 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
624 return (ENOMEM);
625 }
626
627 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
628 map)) != 0) {
629 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
630 return (ENOMEM);
631 }
632
633 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
634 bzero(p, len);
635 *(void **)desc = p;
636
637 if (sw_size) {
638 len = nelem * sw_size;
639 s = malloc(len, M_DEVBUF, M_WAITOK);
640 bzero(s, len);
641 *(void **)sdesc = s;
642 }
643 if (parent_entry_tag == NULL)
644 return (0);
645
646 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
647 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
648 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
649 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
650 NULL, NULL, entry_tag)) != 0) {
651 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
652 return (ENOMEM);
653 }
654 return (0);
655}
656
657static void
658sge_slow_intr_handler(void *arg, int ncount)
659{
660 adapter_t *sc = arg;
661
662 t3_slow_intr_handler(sc);
663}
664
665static void
666sge_timer_cb(void *arg)
667{
668 adapter_t *sc = arg;
669 struct port_info *p;
670 struct sge_qset *qs;
671 struct sge_txq *txq;
672 int i, j;
673 int reclaim_eth, reclaim_ofl, refill_rx;
674
675 for (i = 0; i < sc->params.nports; i++)
676 for (j = 0; j < sc->port[i].nqsets; j++) {
677 qs = &sc->sge.qs[i + j];
678 txq = &qs->txq[0];
679 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
680 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
681 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
682 (qs->fl[1].credits < qs->fl[1].size));
683 if (reclaim_eth || reclaim_ofl || refill_rx) {
684 p = &sc->port[i];
685 taskqueue_enqueue(p->tq, &p->timer_reclaim_task);
686 break;
687 }
688 }
689 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
689 if (sc->open_device_map != 0)
690 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
690}
691
692/*
693 * This is meant to be a catch-all function to keep sge state private
694 * to sge.c
695 *
696 */
697int
698t3_sge_init_adapter(adapter_t *sc)
699{
700 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
701 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
702 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
703 return (0);
704}
705
706int
707t3_sge_init_port(struct port_info *p)
708{
709 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p);
710 return (0);
711}
712
713void
714t3_sge_deinit_sw(adapter_t *sc)
715{
716 int i;
717
718 callout_drain(&sc->sge_timer_ch);
719 if (sc->tq)
720 taskqueue_drain(sc->tq, &sc->slow_intr_task);
721 for (i = 0; i < sc->params.nports; i++)
722 if (sc->port[i].tq != NULL)
723 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
724}
725
726/**
727 * refill_rspq - replenish an SGE response queue
728 * @adapter: the adapter
729 * @q: the response queue to replenish
730 * @credits: how many new responses to make available
731 *
732 * Replenishes a response queue by making the supplied number of responses
733 * available to HW.
734 */
735static __inline void
736refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
737{
738
739 /* mbufs are allocated on demand when a rspq entry is processed. */
740 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
741 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
742}
743
744
745static void
746sge_timer_reclaim(void *arg, int ncount)
747{
748 struct port_info *p = arg;
749 int i, nqsets = p->nqsets;
750 adapter_t *sc = p->adapter;
751 struct sge_qset *qs;
752 struct sge_txq *txq;
753 struct mtx *lock;
754 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
755 int n, reclaimable;
756
757 for (i = 0; i < nqsets; i++) {
758 qs = &sc->sge.qs[i];
759 txq = &qs->txq[TXQ_ETH];
760 reclaimable = desc_reclaimable(txq);
761 if (reclaimable > 0) {
762 mtx_lock(&txq->lock);
763 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
764 mtx_unlock(&txq->lock);
765
766 for (i = 0; i < n; i++)
767 m_freem_vec(m_vec[i]);
768
769 if (p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
770 txq->size - txq->in_use >= TX_START_MAX_DESC) {
771 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
772 taskqueue_enqueue(p->tq, &p->start_task);
773 }
774 }
775
776 txq = &qs->txq[TXQ_OFLD];
777 reclaimable = desc_reclaimable(txq);
778 if (reclaimable > 0) {
779 mtx_lock(&txq->lock);
780 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
781 mtx_unlock(&txq->lock);
782
783 for (i = 0; i < n; i++)
784 m_freem_vec(m_vec[i]);
785 }
786
787 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
788 &sc->sge.qs[0].rspq.lock;
789
790 if (mtx_trylock(lock)) {
791 /* XXX currently assume that we are *NOT* polling */
792 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
793
794 if (qs->fl[0].credits < qs->fl[0].size - 16)
795 __refill_fl(sc, &qs->fl[0]);
796 if (qs->fl[1].credits < qs->fl[1].size - 16)
797 __refill_fl(sc, &qs->fl[1]);
798
799 if (status & (1 << qs->rspq.cntxt_id)) {
800 if (qs->rspq.credits) {
801 refill_rspq(sc, &qs->rspq, 1);
802 qs->rspq.credits--;
803 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
804 1 << qs->rspq.cntxt_id);
805 }
806 }
807 mtx_unlock(lock);
808 }
809 }
810}
811
812/**
813 * init_qset_cntxt - initialize an SGE queue set context info
814 * @qs: the queue set
815 * @id: the queue set id
816 *
817 * Initializes the TIDs and context ids for the queues of a queue set.
818 */
819static void
820init_qset_cntxt(struct sge_qset *qs, u_int id)
821{
822
823 qs->rspq.cntxt_id = id;
824 qs->fl[0].cntxt_id = 2 * id;
825 qs->fl[1].cntxt_id = 2 * id + 1;
826 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
827 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
828 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
829 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
830 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
831}
832
833
834static void
835txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
836{
837 txq->in_use += ndesc;
838 /*
839 * XXX we don't handle stopping of queue
840 * presumably start handles this when we bump against the end
841 */
842 txqs->gen = txq->gen;
843 txq->unacked += ndesc;
844 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
845 txq->unacked &= 7;
846 txqs->pidx = txq->pidx;
847 txq->pidx += ndesc;
848
849 if (txq->pidx >= txq->size) {
850 txq->pidx -= txq->size;
851 txq->gen ^= 1;
852 }
853
854}
855
856/**
857 * calc_tx_descs - calculate the number of Tx descriptors for a packet
858 * @m: the packet mbufs
859 * @nsegs: the number of segments
860 *
861 * Returns the number of Tx descriptors needed for the given Ethernet
862 * packet. Ethernet packets require addition of WR and CPL headers.
863 */
864static __inline unsigned int
865calc_tx_descs(const struct mbuf *m, int nsegs)
866{
867 unsigned int flits;
868
869 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
870 return 1;
871
872 flits = sgl_len(nsegs) + 2;
873#ifdef TSO_SUPPORTED
874 if (m->m_pkthdr.csum_flags & (CSUM_TSO))
875 flits++;
876#endif
877 return flits_to_desc(flits);
878}
879
880static unsigned int
881busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
882 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs)
883{
884 struct mbuf *m0;
885 int err, pktlen;
886
887 m0 = *m;
888 pktlen = m0->m_pkthdr.len;
889
890 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
891#ifdef DEBUG
892 if (err) {
893 int n = 0;
894 struct mbuf *mtmp = m0;
895 while(mtmp) {
896 n++;
897 mtmp = mtmp->m_next;
898 }
899 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
900 err, m0->m_pkthdr.len, n);
901 }
902#endif
903 if (err == EFBIG) {
904 /* Too many segments, try to defrag */
905 m0 = m_defrag(m0, M_NOWAIT);
906 if (m0 == NULL) {
907 m_freem(*m);
908 *m = NULL;
909 return (ENOBUFS);
910 }
911 *m = m0;
912 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
913 }
914
915 if (err == ENOMEM) {
916 return (err);
917 }
918
919 if (err) {
920 if (cxgb_debug)
921 printf("map failure err=%d pktlen=%d\n", err, pktlen);
922 m_freem_vec(m0);
923 *m = NULL;
924 return (err);
925 }
926
927 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE);
928 stx->flags |= TX_SW_DESC_MAPPED;
929
930 return (0);
931}
932
933/**
934 * make_sgl - populate a scatter/gather list for a packet
935 * @sgp: the SGL to populate
936 * @segs: the packet dma segments
937 * @nsegs: the number of segments
938 *
939 * Generates a scatter/gather list for the buffers that make up a packet
940 * and returns the SGL size in 8-byte words. The caller must size the SGL
941 * appropriately.
942 */
943static __inline void
944make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
945{
946 int i, idx;
947
948 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
949 if (i && idx == 0)
950 ++sgp;
951
952 sgp->len[idx] = htobe32(segs[i].ds_len);
953 sgp->addr[idx] = htobe64(segs[i].ds_addr);
954 }
955
956 if (idx)
957 sgp->len[idx] = 0;
958}
959
960/**
961 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
962 * @adap: the adapter
963 * @q: the Tx queue
964 *
965 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
966 * where the HW is going to sleep just after we checked, however,
967 * then the interrupt handler will detect the outstanding TX packet
968 * and ring the doorbell for us.
969 *
970 * When GTS is disabled we unconditionally ring the doorbell.
971 */
972static __inline void
973check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
974{
975#if USE_GTS
976 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
977 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
978 set_bit(TXQ_LAST_PKT_DB, &q->flags);
979#ifdef T3_TRACE
980 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
981 q->cntxt_id);
982#endif
983 t3_write_reg(adap, A_SG_KDOORBELL,
984 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
985 }
986#else
987 wmb(); /* write descriptors before telling HW */
988 t3_write_reg(adap, A_SG_KDOORBELL,
989 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
990#endif
991}
992
993static __inline void
994wr_gen2(struct tx_desc *d, unsigned int gen)
995{
996#if SGE_NUM_GENBITS == 2
997 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
998#endif
999}
1000
1001
1002
1003/**
1004 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1005 * @ndesc: number of Tx descriptors spanned by the SGL
1006 * @txd: first Tx descriptor to be written
1007 * @txqs: txq state (generation and producer index)
1008 * @txq: the SGE Tx queue
1009 * @sgl: the SGL
1010 * @flits: number of flits to the start of the SGL in the first descriptor
1011 * @sgl_flits: the SGL size in flits
1012 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1013 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1014 *
1015 * Write a work request header and an associated SGL. If the SGL is
1016 * small enough to fit into one Tx descriptor it has already been written
1017 * and we just need to write the WR header. Otherwise we distribute the
1018 * SGL across the number of descriptors it spans.
1019 */
1020
1021static void
1022write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1023 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1024 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1025{
1026
1027 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1028 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1029
1030 if (__predict_true(ndesc == 1)) {
1031 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1032 V_WR_SGLSFLT(flits)) | wr_hi;
1033 wmb();
1034 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1035 V_WR_GEN(txqs->gen)) | wr_lo;
1036 /* XXX gen? */
1037 wr_gen2(txd, txqs->gen);
1038 } else {
1039 unsigned int ogen = txqs->gen;
1040 const uint64_t *fp = (const uint64_t *)sgl;
1041 struct work_request_hdr *wp = wrp;
1042
1043 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1044 V_WR_SGLSFLT(flits)) | wr_hi;
1045
1046 while (sgl_flits) {
1047 unsigned int avail = WR_FLITS - flits;
1048
1049 if (avail > sgl_flits)
1050 avail = sgl_flits;
1051 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1052 sgl_flits -= avail;
1053 ndesc--;
1054 if (!sgl_flits)
1055 break;
1056
1057 fp += avail;
1058 txd++;
1059 txsd++;
1060 if (++txqs->pidx == txq->size) {
1061 txqs->pidx = 0;
1062 txqs->gen ^= 1;
1063 txd = txq->desc;
1064 txsd = txq->sdesc;
1065 }
1066
1067 /*
1068 * when the head of the mbuf chain
1069 * is freed all clusters will be freed
1070 * with it
1071 */
1072 txsd->m = NULL;
1073 wrp = (struct work_request_hdr *)txd;
1074 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1075 V_WR_SGLSFLT(1)) | wr_hi;
1076 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1077 sgl_flits + 1)) |
1078 V_WR_GEN(txqs->gen)) | wr_lo;
1079 wr_gen2(txd, txqs->gen);
1080 flits = 1;
1081 }
1082 wrp->wr_hi |= htonl(F_WR_EOP);
1083 wmb();
1084 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1085 wr_gen2((struct tx_desc *)wp, ogen);
1086 }
1087}
1088
1089
1090/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1091#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1092
1093int
1094t3_encap(struct port_info *p, struct mbuf **m)
1095{
1096 adapter_t *sc;
1097 struct mbuf *m0;
1098 struct sge_qset *qs;
1099 struct sge_txq *txq;
1100 struct tx_sw_desc *stx;
1101 struct txq_state txqs;
1102 unsigned int nsegs, ndesc, flits, cntrl, mlen;
1103 int err, tso_info = 0;
1104
1105 struct work_request_hdr *wrp;
1106 struct tx_sw_desc *txsd;
1107 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1108 bus_dma_segment_t segs[TX_MAX_SEGS];
1109 uint32_t wr_hi, wr_lo, sgl_flits;
1110
1111 struct tx_desc *txd;
1112 struct cpl_tx_pkt *cpl;
1113
1114 DPRINTF("t3_encap ");
1115 m0 = *m;
1116 sc = p->adapter;
1117 qs = &sc->sge.qs[p->first_qset];
1118 txq = &qs->txq[TXQ_ETH];
1119 stx = &txq->sdesc[txq->pidx];
1120 txd = &txq->desc[txq->pidx];
1121 cpl = (struct cpl_tx_pkt *)txd;
1122 mlen = m0->m_pkthdr.len;
1123 cpl->len = htonl(mlen | 0x80000000);
1124
1125 DPRINTF("mlen=%d\n", mlen);
1126 /*
1127 * XXX handle checksum, TSO, and VLAN here
1128 *
1129 */
1130 cntrl = V_TXPKT_INTF(p->port);
1131
1132 /*
1133 * XXX need to add VLAN support for 6.x
1134 */
1135#ifdef VLAN_SUPPORTED
1136 if (m0->m_flags & M_VLANTAG)
1137 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
1138 if (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1139 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1140#endif
1141 if (tso_info) {
1142 int eth_type;
1143 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
1144 struct ip *ip;
1145 struct tcphdr *tcp;
1146 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
1147
1148 txd->flit[2] = 0;
1149 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1150 hdr->cntrl = htonl(cntrl);
1151
1152 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1153 pkthdr = &tmp[0];
1154 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
1155 } else {
1156 pkthdr = mtod(m0, uint8_t *);
1157 }
1158
1159 if (__predict_false(m0->m_flags & M_VLANTAG)) {
1160 eth_type = CPL_ETH_II_VLAN;
1161 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1162 ETHER_VLAN_ENCAP_LEN);
1163 } else {
1164 eth_type = CPL_ETH_II;
1165 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1166 }
1167 tcp = (struct tcphdr *)((uint8_t *)ip +
1168 sizeof(*ip));
1169
1170 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1171 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1172 V_LSO_TCPHDR_WORDS(tcp->th_off);
1173 hdr->lso_info = htonl(tso_info);
1174 flits = 3;
1175 } else {
1176 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1177 cpl->cntrl = htonl(cntrl);
1178
1179 if (mlen <= WR_LEN - sizeof(*cpl)) {
1180 txq_prod(txq, 1, &txqs);
1181 txq->sdesc[txqs.pidx].m = m0;
1182 m_set_priority(m0, txqs.pidx);
1183
1184 if (m0->m_len == m0->m_pkthdr.len)
1185 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
1186 else
1187 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1188
1189 flits = (mlen + 7) / 8 + 2;
1190 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1191 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1192 F_WR_SOP | F_WR_EOP | txqs.compl);
1193 wmb();
1194 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1195 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1196
1197 wr_gen2(txd, txqs.gen);
1198 check_ring_tx_db(sc, txq);
1199 return (0);
1200 }
1201 flits = 2;
1202 }
1203
1204 wrp = (struct work_request_hdr *)txd;
1205
1206 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
1207 return (err);
1208 }
1209 m0 = *m;
1210 ndesc = calc_tx_descs(m0, nsegs);
1211
1212 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1213 make_sgl(sgp, segs, nsegs);
1214
1215 sgl_flits = sgl_len(nsegs);
1216
1217 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1218 txq_prod(txq, ndesc, &txqs);
1219 txsd = &txq->sdesc[txqs.pidx];
1220 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1221 wr_lo = htonl(V_WR_TID(txq->token));
1222 txsd->m = m0;
1223 m_set_priority(m0, txqs.pidx);
1224
1225 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1226 check_ring_tx_db(p->adapter, txq);
1227
1228 return (0);
1229}
1230
1231
1232/**
1233 * write_imm - write a packet into a Tx descriptor as immediate data
1234 * @d: the Tx descriptor to write
1235 * @m: the packet
1236 * @len: the length of packet data to write as immediate data
1237 * @gen: the generation bit value to write
1238 *
1239 * Writes a packet as immediate data into a Tx descriptor. The packet
1240 * contains a work request at its beginning. We must write the packet
1241 * carefully so the SGE doesn't read accidentally before it's written in
1242 * its entirety.
1243 */
1244static __inline void
1245write_imm(struct tx_desc *d, struct mbuf *m,
1246 unsigned int len, unsigned int gen)
1247{
1248 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1249 struct work_request_hdr *to = (struct work_request_hdr *)d;
1250
1251 memcpy(&to[1], &from[1], len - sizeof(*from));
1252 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1253 V_WR_BCNTLFLT(len & 7));
1254 wmb();
1255 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1256 V_WR_LEN((len + 7) / 8));
1257 wr_gen2(d, gen);
1258 m_freem(m);
1259}
1260
1261/**
1262 * check_desc_avail - check descriptor availability on a send queue
1263 * @adap: the adapter
1264 * @q: the TX queue
1265 * @m: the packet needing the descriptors
1266 * @ndesc: the number of Tx descriptors needed
1267 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1268 *
1269 * Checks if the requested number of Tx descriptors is available on an
1270 * SGE send queue. If the queue is already suspended or not enough
1271 * descriptors are available the packet is queued for later transmission.
1272 * Must be called with the Tx queue locked.
1273 *
1274 * Returns 0 if enough descriptors are available, 1 if there aren't
1275 * enough descriptors and the packet has been queued, and 2 if the caller
1276 * needs to retry because there weren't enough descriptors at the
1277 * beginning of the call but some freed up in the mean time.
1278 */
1279static __inline int
1280check_desc_avail(adapter_t *adap, struct sge_txq *q,
1281 struct mbuf *m, unsigned int ndesc,
1282 unsigned int qid)
1283{
1284 /*
1285 * XXX We currently only use this for checking the control queue
1286 * the control queue is only used for binding qsets which happens
1287 * at init time so we are guaranteed enough descriptors
1288 */
1289 if (__predict_false(!mbufq_empty(&q->sendq))) {
1290addq_exit: mbufq_tail(&q->sendq, m);
1291 return 1;
1292 }
1293 if (__predict_false(q->size - q->in_use < ndesc)) {
1294
1295 struct sge_qset *qs = txq_to_qset(q, qid);
1296
1297 setbit(&qs->txq_stopped, qid);
1298 smp_mb();
1299
1300 if (should_restart_tx(q) &&
1301 test_and_clear_bit(qid, &qs->txq_stopped))
1302 return 2;
1303
1304 q->stops++;
1305 goto addq_exit;
1306 }
1307 return 0;
1308}
1309
1310
1311/**
1312 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1313 * @q: the SGE control Tx queue
1314 *
1315 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1316 * that send only immediate data (presently just the control queues) and
1317 * thus do not have any mbufs
1318 */
1319static __inline void
1320reclaim_completed_tx_imm(struct sge_txq *q)
1321{
1322 unsigned int reclaim = q->processed - q->cleaned;
1323
1324 mtx_assert(&q->lock, MA_OWNED);
1325
1326 q->in_use -= reclaim;
1327 q->cleaned += reclaim;
1328}
1329
1330static __inline int
1331immediate(const struct mbuf *m)
1332{
1333 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1334}
1335
1336/**
1337 * ctrl_xmit - send a packet through an SGE control Tx queue
1338 * @adap: the adapter
1339 * @q: the control queue
1340 * @m: the packet
1341 *
1342 * Send a packet through an SGE control Tx queue. Packets sent through
1343 * a control queue must fit entirely as immediate data in a single Tx
1344 * descriptor and have no page fragments.
1345 */
1346static int
1347ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1348{
1349 int ret;
1350 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1351
1352 if (__predict_false(!immediate(m))) {
1353 m_freem(m);
1354 return 0;
1355 }
1356
1357 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1358 wrp->wr_lo = htonl(V_WR_TID(q->token));
1359
1360 mtx_lock(&q->lock);
1361again: reclaim_completed_tx_imm(q);
1362
1363 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1364 if (__predict_false(ret)) {
1365 if (ret == 1) {
1366 mtx_unlock(&q->lock);
1367 return (-1);
1368 }
1369 goto again;
1370 }
1371
1372 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1373
1374 q->in_use++;
1375 if (++q->pidx >= q->size) {
1376 q->pidx = 0;
1377 q->gen ^= 1;
1378 }
1379 mtx_unlock(&q->lock);
1380 wmb();
1381 t3_write_reg(adap, A_SG_KDOORBELL,
1382 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1383 return (0);
1384}
1385
1386
1387/**
1388 * restart_ctrlq - restart a suspended control queue
1389 * @qs: the queue set cotaining the control queue
1390 *
1391 * Resumes transmission on a suspended Tx control queue.
1392 */
1393static void
1394restart_ctrlq(void *data, int npending)
1395{
1396 struct mbuf *m;
1397 struct sge_qset *qs = (struct sge_qset *)data;
1398 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1399 adapter_t *adap = qs->port->adapter;
1400
1401 mtx_lock(&q->lock);
1402again: reclaim_completed_tx_imm(q);
1403
1404 while (q->in_use < q->size &&
1405 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1406
1407 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1408
1409 if (++q->pidx >= q->size) {
1410 q->pidx = 0;
1411 q->gen ^= 1;
1412 }
1413 q->in_use++;
1414 }
1415 if (!mbufq_empty(&q->sendq)) {
1416 setbit(&qs->txq_stopped, TXQ_CTRL);
1417 smp_mb();
1418
1419 if (should_restart_tx(q) &&
1420 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1421 goto again;
1422 q->stops++;
1423 }
1424 mtx_unlock(&q->lock);
1425 t3_write_reg(adap, A_SG_KDOORBELL,
1426 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1427}
1428
1429
1430/*
1431 * Send a management message through control queue 0
1432 */
1433int
1434t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1435{
1436 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1437}
1438
1439/**
1440 * free_qset - free the resources of an SGE queue set
1441 * @sc: the controller owning the queue set
1442 * @q: the queue set
1443 *
1444 * Release the HW and SW resources associated with an SGE queue set, such
1445 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1446 * queue set must be quiesced prior to calling this.
1447 */
1448static void
1449t3_free_qset(adapter_t *sc, struct sge_qset *q)
1450{
1451 int i;
1452
1453 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1454 if (q->fl[i].desc) {
1455 mtx_lock(&sc->sge.reg_lock);
1456 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1457 mtx_unlock(&sc->sge.reg_lock);
1458 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1459 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1460 q->fl[i].desc_map);
1461 bus_dma_tag_destroy(q->fl[i].desc_tag);
1462 bus_dma_tag_destroy(q->fl[i].entry_tag);
1463 }
1464 if (q->fl[i].sdesc) {
1465 free_rx_bufs(sc, &q->fl[i]);
1466 free(q->fl[i].sdesc, M_DEVBUF);
1467 }
1468 }
1469
691}
692
693/*
694 * This is meant to be a catch-all function to keep sge state private
695 * to sge.c
696 *
697 */
698int
699t3_sge_init_adapter(adapter_t *sc)
700{
701 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
702 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
703 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
704 return (0);
705}
706
707int
708t3_sge_init_port(struct port_info *p)
709{
710 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p);
711 return (0);
712}
713
714void
715t3_sge_deinit_sw(adapter_t *sc)
716{
717 int i;
718
719 callout_drain(&sc->sge_timer_ch);
720 if (sc->tq)
721 taskqueue_drain(sc->tq, &sc->slow_intr_task);
722 for (i = 0; i < sc->params.nports; i++)
723 if (sc->port[i].tq != NULL)
724 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
725}
726
727/**
728 * refill_rspq - replenish an SGE response queue
729 * @adapter: the adapter
730 * @q: the response queue to replenish
731 * @credits: how many new responses to make available
732 *
733 * Replenishes a response queue by making the supplied number of responses
734 * available to HW.
735 */
736static __inline void
737refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
738{
739
740 /* mbufs are allocated on demand when a rspq entry is processed. */
741 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
742 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
743}
744
745
746static void
747sge_timer_reclaim(void *arg, int ncount)
748{
749 struct port_info *p = arg;
750 int i, nqsets = p->nqsets;
751 adapter_t *sc = p->adapter;
752 struct sge_qset *qs;
753 struct sge_txq *txq;
754 struct mtx *lock;
755 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
756 int n, reclaimable;
757
758 for (i = 0; i < nqsets; i++) {
759 qs = &sc->sge.qs[i];
760 txq = &qs->txq[TXQ_ETH];
761 reclaimable = desc_reclaimable(txq);
762 if (reclaimable > 0) {
763 mtx_lock(&txq->lock);
764 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
765 mtx_unlock(&txq->lock);
766
767 for (i = 0; i < n; i++)
768 m_freem_vec(m_vec[i]);
769
770 if (p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
771 txq->size - txq->in_use >= TX_START_MAX_DESC) {
772 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
773 taskqueue_enqueue(p->tq, &p->start_task);
774 }
775 }
776
777 txq = &qs->txq[TXQ_OFLD];
778 reclaimable = desc_reclaimable(txq);
779 if (reclaimable > 0) {
780 mtx_lock(&txq->lock);
781 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
782 mtx_unlock(&txq->lock);
783
784 for (i = 0; i < n; i++)
785 m_freem_vec(m_vec[i]);
786 }
787
788 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
789 &sc->sge.qs[0].rspq.lock;
790
791 if (mtx_trylock(lock)) {
792 /* XXX currently assume that we are *NOT* polling */
793 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
794
795 if (qs->fl[0].credits < qs->fl[0].size - 16)
796 __refill_fl(sc, &qs->fl[0]);
797 if (qs->fl[1].credits < qs->fl[1].size - 16)
798 __refill_fl(sc, &qs->fl[1]);
799
800 if (status & (1 << qs->rspq.cntxt_id)) {
801 if (qs->rspq.credits) {
802 refill_rspq(sc, &qs->rspq, 1);
803 qs->rspq.credits--;
804 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
805 1 << qs->rspq.cntxt_id);
806 }
807 }
808 mtx_unlock(lock);
809 }
810 }
811}
812
813/**
814 * init_qset_cntxt - initialize an SGE queue set context info
815 * @qs: the queue set
816 * @id: the queue set id
817 *
818 * Initializes the TIDs and context ids for the queues of a queue set.
819 */
820static void
821init_qset_cntxt(struct sge_qset *qs, u_int id)
822{
823
824 qs->rspq.cntxt_id = id;
825 qs->fl[0].cntxt_id = 2 * id;
826 qs->fl[1].cntxt_id = 2 * id + 1;
827 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
828 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
829 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
830 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
831 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
832}
833
834
835static void
836txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
837{
838 txq->in_use += ndesc;
839 /*
840 * XXX we don't handle stopping of queue
841 * presumably start handles this when we bump against the end
842 */
843 txqs->gen = txq->gen;
844 txq->unacked += ndesc;
845 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
846 txq->unacked &= 7;
847 txqs->pidx = txq->pidx;
848 txq->pidx += ndesc;
849
850 if (txq->pidx >= txq->size) {
851 txq->pidx -= txq->size;
852 txq->gen ^= 1;
853 }
854
855}
856
857/**
858 * calc_tx_descs - calculate the number of Tx descriptors for a packet
859 * @m: the packet mbufs
860 * @nsegs: the number of segments
861 *
862 * Returns the number of Tx descriptors needed for the given Ethernet
863 * packet. Ethernet packets require addition of WR and CPL headers.
864 */
865static __inline unsigned int
866calc_tx_descs(const struct mbuf *m, int nsegs)
867{
868 unsigned int flits;
869
870 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
871 return 1;
872
873 flits = sgl_len(nsegs) + 2;
874#ifdef TSO_SUPPORTED
875 if (m->m_pkthdr.csum_flags & (CSUM_TSO))
876 flits++;
877#endif
878 return flits_to_desc(flits);
879}
880
881static unsigned int
882busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
883 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs)
884{
885 struct mbuf *m0;
886 int err, pktlen;
887
888 m0 = *m;
889 pktlen = m0->m_pkthdr.len;
890
891 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
892#ifdef DEBUG
893 if (err) {
894 int n = 0;
895 struct mbuf *mtmp = m0;
896 while(mtmp) {
897 n++;
898 mtmp = mtmp->m_next;
899 }
900 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
901 err, m0->m_pkthdr.len, n);
902 }
903#endif
904 if (err == EFBIG) {
905 /* Too many segments, try to defrag */
906 m0 = m_defrag(m0, M_NOWAIT);
907 if (m0 == NULL) {
908 m_freem(*m);
909 *m = NULL;
910 return (ENOBUFS);
911 }
912 *m = m0;
913 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
914 }
915
916 if (err == ENOMEM) {
917 return (err);
918 }
919
920 if (err) {
921 if (cxgb_debug)
922 printf("map failure err=%d pktlen=%d\n", err, pktlen);
923 m_freem_vec(m0);
924 *m = NULL;
925 return (err);
926 }
927
928 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE);
929 stx->flags |= TX_SW_DESC_MAPPED;
930
931 return (0);
932}
933
934/**
935 * make_sgl - populate a scatter/gather list for a packet
936 * @sgp: the SGL to populate
937 * @segs: the packet dma segments
938 * @nsegs: the number of segments
939 *
940 * Generates a scatter/gather list for the buffers that make up a packet
941 * and returns the SGL size in 8-byte words. The caller must size the SGL
942 * appropriately.
943 */
944static __inline void
945make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
946{
947 int i, idx;
948
949 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
950 if (i && idx == 0)
951 ++sgp;
952
953 sgp->len[idx] = htobe32(segs[i].ds_len);
954 sgp->addr[idx] = htobe64(segs[i].ds_addr);
955 }
956
957 if (idx)
958 sgp->len[idx] = 0;
959}
960
961/**
962 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
963 * @adap: the adapter
964 * @q: the Tx queue
965 *
966 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
967 * where the HW is going to sleep just after we checked, however,
968 * then the interrupt handler will detect the outstanding TX packet
969 * and ring the doorbell for us.
970 *
971 * When GTS is disabled we unconditionally ring the doorbell.
972 */
973static __inline void
974check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
975{
976#if USE_GTS
977 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
978 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
979 set_bit(TXQ_LAST_PKT_DB, &q->flags);
980#ifdef T3_TRACE
981 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
982 q->cntxt_id);
983#endif
984 t3_write_reg(adap, A_SG_KDOORBELL,
985 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
986 }
987#else
988 wmb(); /* write descriptors before telling HW */
989 t3_write_reg(adap, A_SG_KDOORBELL,
990 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
991#endif
992}
993
994static __inline void
995wr_gen2(struct tx_desc *d, unsigned int gen)
996{
997#if SGE_NUM_GENBITS == 2
998 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
999#endif
1000}
1001
1002
1003
1004/**
1005 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1006 * @ndesc: number of Tx descriptors spanned by the SGL
1007 * @txd: first Tx descriptor to be written
1008 * @txqs: txq state (generation and producer index)
1009 * @txq: the SGE Tx queue
1010 * @sgl: the SGL
1011 * @flits: number of flits to the start of the SGL in the first descriptor
1012 * @sgl_flits: the SGL size in flits
1013 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1014 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1015 *
1016 * Write a work request header and an associated SGL. If the SGL is
1017 * small enough to fit into one Tx descriptor it has already been written
1018 * and we just need to write the WR header. Otherwise we distribute the
1019 * SGL across the number of descriptors it spans.
1020 */
1021
1022static void
1023write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1024 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1025 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1026{
1027
1028 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1029 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1030
1031 if (__predict_true(ndesc == 1)) {
1032 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1033 V_WR_SGLSFLT(flits)) | wr_hi;
1034 wmb();
1035 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1036 V_WR_GEN(txqs->gen)) | wr_lo;
1037 /* XXX gen? */
1038 wr_gen2(txd, txqs->gen);
1039 } else {
1040 unsigned int ogen = txqs->gen;
1041 const uint64_t *fp = (const uint64_t *)sgl;
1042 struct work_request_hdr *wp = wrp;
1043
1044 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1045 V_WR_SGLSFLT(flits)) | wr_hi;
1046
1047 while (sgl_flits) {
1048 unsigned int avail = WR_FLITS - flits;
1049
1050 if (avail > sgl_flits)
1051 avail = sgl_flits;
1052 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1053 sgl_flits -= avail;
1054 ndesc--;
1055 if (!sgl_flits)
1056 break;
1057
1058 fp += avail;
1059 txd++;
1060 txsd++;
1061 if (++txqs->pidx == txq->size) {
1062 txqs->pidx = 0;
1063 txqs->gen ^= 1;
1064 txd = txq->desc;
1065 txsd = txq->sdesc;
1066 }
1067
1068 /*
1069 * when the head of the mbuf chain
1070 * is freed all clusters will be freed
1071 * with it
1072 */
1073 txsd->m = NULL;
1074 wrp = (struct work_request_hdr *)txd;
1075 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1076 V_WR_SGLSFLT(1)) | wr_hi;
1077 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1078 sgl_flits + 1)) |
1079 V_WR_GEN(txqs->gen)) | wr_lo;
1080 wr_gen2(txd, txqs->gen);
1081 flits = 1;
1082 }
1083 wrp->wr_hi |= htonl(F_WR_EOP);
1084 wmb();
1085 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1086 wr_gen2((struct tx_desc *)wp, ogen);
1087 }
1088}
1089
1090
1091/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1092#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1093
1094int
1095t3_encap(struct port_info *p, struct mbuf **m)
1096{
1097 adapter_t *sc;
1098 struct mbuf *m0;
1099 struct sge_qset *qs;
1100 struct sge_txq *txq;
1101 struct tx_sw_desc *stx;
1102 struct txq_state txqs;
1103 unsigned int nsegs, ndesc, flits, cntrl, mlen;
1104 int err, tso_info = 0;
1105
1106 struct work_request_hdr *wrp;
1107 struct tx_sw_desc *txsd;
1108 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1109 bus_dma_segment_t segs[TX_MAX_SEGS];
1110 uint32_t wr_hi, wr_lo, sgl_flits;
1111
1112 struct tx_desc *txd;
1113 struct cpl_tx_pkt *cpl;
1114
1115 DPRINTF("t3_encap ");
1116 m0 = *m;
1117 sc = p->adapter;
1118 qs = &sc->sge.qs[p->first_qset];
1119 txq = &qs->txq[TXQ_ETH];
1120 stx = &txq->sdesc[txq->pidx];
1121 txd = &txq->desc[txq->pidx];
1122 cpl = (struct cpl_tx_pkt *)txd;
1123 mlen = m0->m_pkthdr.len;
1124 cpl->len = htonl(mlen | 0x80000000);
1125
1126 DPRINTF("mlen=%d\n", mlen);
1127 /*
1128 * XXX handle checksum, TSO, and VLAN here
1129 *
1130 */
1131 cntrl = V_TXPKT_INTF(p->port);
1132
1133 /*
1134 * XXX need to add VLAN support for 6.x
1135 */
1136#ifdef VLAN_SUPPORTED
1137 if (m0->m_flags & M_VLANTAG)
1138 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
1139 if (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1140 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1141#endif
1142 if (tso_info) {
1143 int eth_type;
1144 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
1145 struct ip *ip;
1146 struct tcphdr *tcp;
1147 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
1148
1149 txd->flit[2] = 0;
1150 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1151 hdr->cntrl = htonl(cntrl);
1152
1153 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1154 pkthdr = &tmp[0];
1155 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
1156 } else {
1157 pkthdr = mtod(m0, uint8_t *);
1158 }
1159
1160 if (__predict_false(m0->m_flags & M_VLANTAG)) {
1161 eth_type = CPL_ETH_II_VLAN;
1162 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1163 ETHER_VLAN_ENCAP_LEN);
1164 } else {
1165 eth_type = CPL_ETH_II;
1166 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1167 }
1168 tcp = (struct tcphdr *)((uint8_t *)ip +
1169 sizeof(*ip));
1170
1171 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1172 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1173 V_LSO_TCPHDR_WORDS(tcp->th_off);
1174 hdr->lso_info = htonl(tso_info);
1175 flits = 3;
1176 } else {
1177 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1178 cpl->cntrl = htonl(cntrl);
1179
1180 if (mlen <= WR_LEN - sizeof(*cpl)) {
1181 txq_prod(txq, 1, &txqs);
1182 txq->sdesc[txqs.pidx].m = m0;
1183 m_set_priority(m0, txqs.pidx);
1184
1185 if (m0->m_len == m0->m_pkthdr.len)
1186 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
1187 else
1188 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1189
1190 flits = (mlen + 7) / 8 + 2;
1191 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1192 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1193 F_WR_SOP | F_WR_EOP | txqs.compl);
1194 wmb();
1195 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1196 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1197
1198 wr_gen2(txd, txqs.gen);
1199 check_ring_tx_db(sc, txq);
1200 return (0);
1201 }
1202 flits = 2;
1203 }
1204
1205 wrp = (struct work_request_hdr *)txd;
1206
1207 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
1208 return (err);
1209 }
1210 m0 = *m;
1211 ndesc = calc_tx_descs(m0, nsegs);
1212
1213 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1214 make_sgl(sgp, segs, nsegs);
1215
1216 sgl_flits = sgl_len(nsegs);
1217
1218 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1219 txq_prod(txq, ndesc, &txqs);
1220 txsd = &txq->sdesc[txqs.pidx];
1221 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1222 wr_lo = htonl(V_WR_TID(txq->token));
1223 txsd->m = m0;
1224 m_set_priority(m0, txqs.pidx);
1225
1226 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1227 check_ring_tx_db(p->adapter, txq);
1228
1229 return (0);
1230}
1231
1232
1233/**
1234 * write_imm - write a packet into a Tx descriptor as immediate data
1235 * @d: the Tx descriptor to write
1236 * @m: the packet
1237 * @len: the length of packet data to write as immediate data
1238 * @gen: the generation bit value to write
1239 *
1240 * Writes a packet as immediate data into a Tx descriptor. The packet
1241 * contains a work request at its beginning. We must write the packet
1242 * carefully so the SGE doesn't read accidentally before it's written in
1243 * its entirety.
1244 */
1245static __inline void
1246write_imm(struct tx_desc *d, struct mbuf *m,
1247 unsigned int len, unsigned int gen)
1248{
1249 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1250 struct work_request_hdr *to = (struct work_request_hdr *)d;
1251
1252 memcpy(&to[1], &from[1], len - sizeof(*from));
1253 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1254 V_WR_BCNTLFLT(len & 7));
1255 wmb();
1256 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1257 V_WR_LEN((len + 7) / 8));
1258 wr_gen2(d, gen);
1259 m_freem(m);
1260}
1261
1262/**
1263 * check_desc_avail - check descriptor availability on a send queue
1264 * @adap: the adapter
1265 * @q: the TX queue
1266 * @m: the packet needing the descriptors
1267 * @ndesc: the number of Tx descriptors needed
1268 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1269 *
1270 * Checks if the requested number of Tx descriptors is available on an
1271 * SGE send queue. If the queue is already suspended or not enough
1272 * descriptors are available the packet is queued for later transmission.
1273 * Must be called with the Tx queue locked.
1274 *
1275 * Returns 0 if enough descriptors are available, 1 if there aren't
1276 * enough descriptors and the packet has been queued, and 2 if the caller
1277 * needs to retry because there weren't enough descriptors at the
1278 * beginning of the call but some freed up in the mean time.
1279 */
1280static __inline int
1281check_desc_avail(adapter_t *adap, struct sge_txq *q,
1282 struct mbuf *m, unsigned int ndesc,
1283 unsigned int qid)
1284{
1285 /*
1286 * XXX We currently only use this for checking the control queue
1287 * the control queue is only used for binding qsets which happens
1288 * at init time so we are guaranteed enough descriptors
1289 */
1290 if (__predict_false(!mbufq_empty(&q->sendq))) {
1291addq_exit: mbufq_tail(&q->sendq, m);
1292 return 1;
1293 }
1294 if (__predict_false(q->size - q->in_use < ndesc)) {
1295
1296 struct sge_qset *qs = txq_to_qset(q, qid);
1297
1298 setbit(&qs->txq_stopped, qid);
1299 smp_mb();
1300
1301 if (should_restart_tx(q) &&
1302 test_and_clear_bit(qid, &qs->txq_stopped))
1303 return 2;
1304
1305 q->stops++;
1306 goto addq_exit;
1307 }
1308 return 0;
1309}
1310
1311
1312/**
1313 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1314 * @q: the SGE control Tx queue
1315 *
1316 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1317 * that send only immediate data (presently just the control queues) and
1318 * thus do not have any mbufs
1319 */
1320static __inline void
1321reclaim_completed_tx_imm(struct sge_txq *q)
1322{
1323 unsigned int reclaim = q->processed - q->cleaned;
1324
1325 mtx_assert(&q->lock, MA_OWNED);
1326
1327 q->in_use -= reclaim;
1328 q->cleaned += reclaim;
1329}
1330
1331static __inline int
1332immediate(const struct mbuf *m)
1333{
1334 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1335}
1336
1337/**
1338 * ctrl_xmit - send a packet through an SGE control Tx queue
1339 * @adap: the adapter
1340 * @q: the control queue
1341 * @m: the packet
1342 *
1343 * Send a packet through an SGE control Tx queue. Packets sent through
1344 * a control queue must fit entirely as immediate data in a single Tx
1345 * descriptor and have no page fragments.
1346 */
1347static int
1348ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1349{
1350 int ret;
1351 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1352
1353 if (__predict_false(!immediate(m))) {
1354 m_freem(m);
1355 return 0;
1356 }
1357
1358 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1359 wrp->wr_lo = htonl(V_WR_TID(q->token));
1360
1361 mtx_lock(&q->lock);
1362again: reclaim_completed_tx_imm(q);
1363
1364 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1365 if (__predict_false(ret)) {
1366 if (ret == 1) {
1367 mtx_unlock(&q->lock);
1368 return (-1);
1369 }
1370 goto again;
1371 }
1372
1373 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1374
1375 q->in_use++;
1376 if (++q->pidx >= q->size) {
1377 q->pidx = 0;
1378 q->gen ^= 1;
1379 }
1380 mtx_unlock(&q->lock);
1381 wmb();
1382 t3_write_reg(adap, A_SG_KDOORBELL,
1383 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1384 return (0);
1385}
1386
1387
1388/**
1389 * restart_ctrlq - restart a suspended control queue
1390 * @qs: the queue set cotaining the control queue
1391 *
1392 * Resumes transmission on a suspended Tx control queue.
1393 */
1394static void
1395restart_ctrlq(void *data, int npending)
1396{
1397 struct mbuf *m;
1398 struct sge_qset *qs = (struct sge_qset *)data;
1399 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1400 adapter_t *adap = qs->port->adapter;
1401
1402 mtx_lock(&q->lock);
1403again: reclaim_completed_tx_imm(q);
1404
1405 while (q->in_use < q->size &&
1406 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1407
1408 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1409
1410 if (++q->pidx >= q->size) {
1411 q->pidx = 0;
1412 q->gen ^= 1;
1413 }
1414 q->in_use++;
1415 }
1416 if (!mbufq_empty(&q->sendq)) {
1417 setbit(&qs->txq_stopped, TXQ_CTRL);
1418 smp_mb();
1419
1420 if (should_restart_tx(q) &&
1421 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1422 goto again;
1423 q->stops++;
1424 }
1425 mtx_unlock(&q->lock);
1426 t3_write_reg(adap, A_SG_KDOORBELL,
1427 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1428}
1429
1430
1431/*
1432 * Send a management message through control queue 0
1433 */
1434int
1435t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1436{
1437 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1438}
1439
1440/**
1441 * free_qset - free the resources of an SGE queue set
1442 * @sc: the controller owning the queue set
1443 * @q: the queue set
1444 *
1445 * Release the HW and SW resources associated with an SGE queue set, such
1446 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1447 * queue set must be quiesced prior to calling this.
1448 */
1449static void
1450t3_free_qset(adapter_t *sc, struct sge_qset *q)
1451{
1452 int i;
1453
1454 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1455 if (q->fl[i].desc) {
1456 mtx_lock(&sc->sge.reg_lock);
1457 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1458 mtx_unlock(&sc->sge.reg_lock);
1459 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1460 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1461 q->fl[i].desc_map);
1462 bus_dma_tag_destroy(q->fl[i].desc_tag);
1463 bus_dma_tag_destroy(q->fl[i].entry_tag);
1464 }
1465 if (q->fl[i].sdesc) {
1466 free_rx_bufs(sc, &q->fl[i]);
1467 free(q->fl[i].sdesc, M_DEVBUF);
1468 }
1469 }
1470
1470 for (i = 0; i < SGE_TXQ_PER_SET; ++i) {
1471 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1471 if (q->txq[i].desc) {
1472 mtx_lock(&sc->sge.reg_lock);
1473 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1474 mtx_unlock(&sc->sge.reg_lock);
1475 bus_dmamap_unload(q->txq[i].desc_tag,
1476 q->txq[i].desc_map);
1477 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1478 q->txq[i].desc_map);
1479 bus_dma_tag_destroy(q->txq[i].desc_tag);
1480 bus_dma_tag_destroy(q->txq[i].entry_tag);
1472 if (q->txq[i].desc) {
1473 mtx_lock(&sc->sge.reg_lock);
1474 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1475 mtx_unlock(&sc->sge.reg_lock);
1476 bus_dmamap_unload(q->txq[i].desc_tag,
1477 q->txq[i].desc_map);
1478 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1479 q->txq[i].desc_map);
1480 bus_dma_tag_destroy(q->txq[i].desc_tag);
1481 bus_dma_tag_destroy(q->txq[i].entry_tag);
1482 MTX_DESTROY(&q->txq[i].lock);
1481 }
1482 if (q->txq[i].sdesc) {
1483 free(q->txq[i].sdesc, M_DEVBUF);
1484 }
1483 }
1484 if (q->txq[i].sdesc) {
1485 free(q->txq[i].sdesc, M_DEVBUF);
1486 }
1485 if (mtx_initialized(&q->txq[i].lock)) {
1486 mtx_destroy(&q->txq[i].lock);
1487 }
1488 }
1489
1490 if (q->rspq.desc) {
1491 mtx_lock(&sc->sge.reg_lock);
1492 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1493 mtx_unlock(&sc->sge.reg_lock);
1494
1495 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1496 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1497 q->rspq.desc_map);
1498 bus_dma_tag_destroy(q->rspq.desc_tag);
1487 }
1488
1489 if (q->rspq.desc) {
1490 mtx_lock(&sc->sge.reg_lock);
1491 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1492 mtx_unlock(&sc->sge.reg_lock);
1493
1494 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1495 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1496 q->rspq.desc_map);
1497 bus_dma_tag_destroy(q->rspq.desc_tag);
1498 MTX_DESTROY(&q->rspq.lock);
1499 }
1500
1499 }
1500
1501 if (mtx_initialized(&q->rspq.lock))
1502 mtx_destroy(&q->rspq.lock);
1503
1504 bzero(q, sizeof(*q));
1505}
1506
1507/**
1508 * t3_free_sge_resources - free SGE resources
1509 * @sc: the adapter softc
1510 *
1511 * Frees resources used by the SGE queue sets.
1512 */
1513void
1514t3_free_sge_resources(adapter_t *sc)
1515{
1501 bzero(q, sizeof(*q));
1502}
1503
1504/**
1505 * t3_free_sge_resources - free SGE resources
1506 * @sc: the adapter softc
1507 *
1508 * Frees resources used by the SGE queue sets.
1509 */
1510void
1511t3_free_sge_resources(adapter_t *sc)
1512{
1516 int i;
1513 int i, nqsets;
1517
1514
1518 for (i = 0; i < SGE_QSETS; ++i)
1515 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1516 nqsets += sc->port[i].nqsets;
1517
1518 for (i = 0; i < nqsets; ++i)
1519 t3_free_qset(sc, &sc->sge.qs[i]);
1520}
1521
1522/**
1523 * t3_sge_start - enable SGE
1524 * @sc: the controller softc
1525 *
1526 * Enables the SGE for DMAs. This is the last step in starting packet
1527 * transfers.
1528 */
1529void
1530t3_sge_start(adapter_t *sc)
1531{
1532 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1533}
1534
1535/**
1536 * t3_sge_stop - disable SGE operation
1537 * @sc: the adapter
1538 *
1539 * Disables the DMA engine. This can be called in emeregencies (e.g.,
1540 * from error interrupts) or from normal process context. In the latter
1541 * case it also disables any pending queue restart tasklets. Note that
1542 * if it is called in interrupt context it cannot disable the restart
1543 * tasklets as it cannot wait, however the tasklets will have no effect
1544 * since the doorbells are disabled and the driver will call this again
1545 * later from process context, at which time the tasklets will be stopped
1546 * if they are still running.
1547 */
1548void
1549t3_sge_stop(adapter_t *sc)
1550{
1519 t3_free_qset(sc, &sc->sge.qs[i]);
1520}
1521
1522/**
1523 * t3_sge_start - enable SGE
1524 * @sc: the controller softc
1525 *
1526 * Enables the SGE for DMAs. This is the last step in starting packet
1527 * transfers.
1528 */
1529void
1530t3_sge_start(adapter_t *sc)
1531{
1532 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1533}
1534
1535/**
1536 * t3_sge_stop - disable SGE operation
1537 * @sc: the adapter
1538 *
1539 * Disables the DMA engine. This can be called in emeregencies (e.g.,
1540 * from error interrupts) or from normal process context. In the latter
1541 * case it also disables any pending queue restart tasklets. Note that
1542 * if it is called in interrupt context it cannot disable the restart
1543 * tasklets as it cannot wait, however the tasklets will have no effect
1544 * since the doorbells are disabled and the driver will call this again
1545 * later from process context, at which time the tasklets will be stopped
1546 * if they are still running.
1547 */
1548void
1549t3_sge_stop(adapter_t *sc)
1550{
1551 int i;
1551 int i, nqsets;
1552
1552 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1553
1554 if (sc->tq == NULL)
1555 return;
1556
1553 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1554
1555 if (sc->tq == NULL)
1556 return;
1557
1557 for (i = 0; i < SGE_QSETS; ++i) {
1558 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1559 nqsets += sc->port[i].nqsets;
1560
1561 for (i = 0; i < nqsets; ++i) {
1558 struct sge_qset *qs = &sc->sge.qs[i];
1559
1560 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk);
1561 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk);
1562 }
1563}
1564
1565
1566/**
1567 * free_tx_desc - reclaims Tx descriptors and their buffers
1568 * @adapter: the adapter
1569 * @q: the Tx queue to reclaim descriptors from
1570 * @n: the number of descriptors to reclaim
1571 *
1572 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1573 * Tx buffers. Called with the Tx queue lock held.
1574 */
1575int
1576free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec)
1577{
1578 struct tx_sw_desc *d;
1579 unsigned int cidx = q->cidx;
1580 int nbufs = 0;
1581
1582#ifdef T3_TRACE
1583 T3_TRACE2(sc->tb[q->cntxt_id & 7],
1584 "reclaiming %u Tx descriptors at cidx %u", n, cidx);
1585#endif
1586 d = &q->sdesc[cidx];
1587
1588 while (n-- > 0) {
1589 DPRINTF("cidx=%d d=%p\n", cidx, d);
1590 if (d->m) {
1591 if (d->flags & TX_SW_DESC_MAPPED) {
1592 bus_dmamap_unload(q->entry_tag, d->map);
1593 bus_dmamap_destroy(q->entry_tag, d->map);
1594 d->flags &= ~TX_SW_DESC_MAPPED;
1595 }
1596 if (m_get_priority(d->m) == cidx) {
1597 m_vec[nbufs] = d->m;
1598 d->m = NULL;
1599 nbufs++;
1600 } else {
1601 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
1602 }
1603 }
1604 ++d;
1605 if (++cidx == q->size) {
1606 cidx = 0;
1607 d = q->sdesc;
1608 }
1609 }
1610 q->cidx = cidx;
1611
1612 return (nbufs);
1613}
1614
1615/**
1616 * is_new_response - check if a response is newly written
1617 * @r: the response descriptor
1618 * @q: the response queue
1619 *
1620 * Returns true if a response descriptor contains a yet unprocessed
1621 * response.
1622 */
1623static __inline int
1624is_new_response(const struct rsp_desc *r,
1625 const struct sge_rspq *q)
1626{
1627 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1628}
1629
1630#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1631#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1632 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1633 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1634 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1635
1636/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1637#define NOMEM_INTR_DELAY 2500
1638
1639/**
1640 * write_ofld_wr - write an offload work request
1641 * @adap: the adapter
1642 * @m: the packet to send
1643 * @q: the Tx queue
1644 * @pidx: index of the first Tx descriptor to write
1645 * @gen: the generation value to use
1646 * @ndesc: number of descriptors the packet will occupy
1647 *
1648 * Write an offload work request to send the supplied packet. The packet
1649 * data already carry the work request with most fields populated.
1650 */
1651static void
1652write_ofld_wr(adapter_t *adap, struct mbuf *m,
1653 struct sge_txq *q, unsigned int pidx,
1654 unsigned int gen, unsigned int ndesc,
1655 bus_dma_segment_t *segs, unsigned int nsegs)
1656{
1657 unsigned int sgl_flits, flits;
1658 struct work_request_hdr *from;
1659 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1660 struct tx_desc *d = &q->desc[pidx];
1661 struct txq_state txqs;
1662
1663 if (immediate(m)) {
1664 q->sdesc[pidx].m = NULL;
1665 write_imm(d, m, m->m_len, gen);
1666 return;
1667 }
1668
1669 /* Only TX_DATA builds SGLs */
1670
1671 from = mtod(m, struct work_request_hdr *);
1672 memcpy(&d->flit[1], &from[1],
1673 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from));
1674
1675 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8;
1676 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1677
1678 make_sgl(sgp, segs, nsegs);
1679 sgl_flits = sgl_len(nsegs);
1680
1681 txqs.gen = q->gen;
1682 txqs.pidx = q->pidx;
1683 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1684 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1685 from->wr_hi, from->wr_lo);
1686}
1687
1688/**
1689 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1690 * @m: the packet
1691 *
1692 * Returns the number of Tx descriptors needed for the given offload
1693 * packet. These packets are already fully constructed.
1694 */
1695static __inline unsigned int
1696calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1697{
1698 unsigned int flits, cnt = 0;
1699
1700
1701 if (m->m_len <= WR_LEN)
1702 return 1; /* packet fits as immediate data */
1703
1704 if (m->m_flags & M_IOVEC)
1705 cnt = mtomv(m)->mv_count;
1706
1707 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */
1708
1709 return flits_to_desc(flits + sgl_len(cnt));
1710}
1711
1712/**
1713 * ofld_xmit - send a packet through an offload queue
1714 * @adap: the adapter
1715 * @q: the Tx offload queue
1716 * @m: the packet
1717 *
1718 * Send an offload packet through an SGE offload queue.
1719 */
1720static int
1721ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1722{
1723 int ret;
1724 unsigned int pidx, gen, nsegs;
1725 unsigned int ndesc;
1726 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1727 bus_dma_segment_t segs[TX_MAX_SEGS];
1728 int i, cleaned;
1729 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1730
1731 mtx_lock(&q->lock);
1732 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
1733 mtx_unlock(&q->lock);
1734 return (ret);
1735 }
1736 ndesc = calc_tx_descs_ofld(m, nsegs);
1737again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
1738
1739 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1740 if (__predict_false(ret)) {
1741 if (ret == 1) {
1742 m_set_priority(m, ndesc); /* save for restart */
1743 mtx_unlock(&q->lock);
1744 return NET_XMIT_CN;
1745 }
1746 goto again;
1747 }
1748
1749 gen = q->gen;
1750 q->in_use += ndesc;
1751 pidx = q->pidx;
1752 q->pidx += ndesc;
1753 if (q->pidx >= q->size) {
1754 q->pidx -= q->size;
1755 q->gen ^= 1;
1756 }
1757#ifdef T3_TRACE
1758 T3_TRACE5(adap->tb[q->cntxt_id & 7],
1759 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
1760 ndesc, pidx, skb->len, skb->len - skb->data_len,
1761 skb_shinfo(skb)->nr_frags);
1762#endif
1763 mtx_unlock(&q->lock);
1764
1765 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1766 check_ring_tx_db(adap, q);
1767
1768 for (i = 0; i < cleaned; i++) {
1769 m_freem_vec(m_vec[i]);
1770 }
1771 return NET_XMIT_SUCCESS;
1772}
1773
1774/**
1775 * restart_offloadq - restart a suspended offload queue
1776 * @qs: the queue set cotaining the offload queue
1777 *
1778 * Resumes transmission on a suspended Tx offload queue.
1779 */
1780static void
1781restart_offloadq(void *data, int npending)
1782{
1783
1784 struct mbuf *m;
1785 struct sge_qset *qs = data;
1786 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1787 adapter_t *adap = qs->port->adapter;
1788 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1789 bus_dma_segment_t segs[TX_MAX_SEGS];
1790 int nsegs, i, cleaned;
1791 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1792
1793 mtx_lock(&q->lock);
1794again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
1795
1796 while ((m = mbufq_peek(&q->sendq)) != NULL) {
1797 unsigned int gen, pidx;
1798 unsigned int ndesc = m_get_priority(m);
1799
1800 if (__predict_false(q->size - q->in_use < ndesc)) {
1801 setbit(&qs->txq_stopped, TXQ_OFLD);
1802 smp_mb();
1803
1804 if (should_restart_tx(q) &&
1805 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1806 goto again;
1807 q->stops++;
1808 break;
1809 }
1810
1811 gen = q->gen;
1812 q->in_use += ndesc;
1813 pidx = q->pidx;
1814 q->pidx += ndesc;
1815 if (q->pidx >= q->size) {
1816 q->pidx -= q->size;
1817 q->gen ^= 1;
1818 }
1819
1820 (void)mbufq_dequeue(&q->sendq);
1821 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
1822 mtx_unlock(&q->lock);
1823 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1824 mtx_lock(&q->lock);
1825 }
1826 mtx_unlock(&q->lock);
1827
1828#if USE_GTS
1829 set_bit(TXQ_RUNNING, &q->flags);
1830 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1831#endif
1832 t3_write_reg(adap, A_SG_KDOORBELL,
1833 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1834
1835 for (i = 0; i < cleaned; i++) {
1836 m_freem_vec(m_vec[i]);
1837 }
1838}
1839
1840/**
1841 * queue_set - return the queue set a packet should use
1842 * @m: the packet
1843 *
1844 * Maps a packet to the SGE queue set it should use. The desired queue
1845 * set is carried in bits 1-3 in the packet's priority.
1846 */
1847static __inline int
1848queue_set(const struct mbuf *m)
1849{
1850 return m_get_priority(m) >> 1;
1851}
1852
1853/**
1854 * is_ctrl_pkt - return whether an offload packet is a control packet
1855 * @m: the packet
1856 *
1857 * Determines whether an offload packet should use an OFLD or a CTRL
1858 * Tx queue. This is indicated by bit 0 in the packet's priority.
1859 */
1860static __inline int
1861is_ctrl_pkt(const struct mbuf *m)
1862{
1863 return m_get_priority(m) & 1;
1864}
1865
1866/**
1867 * t3_offload_tx - send an offload packet
1868 * @tdev: the offload device to send to
1869 * @m: the packet
1870 *
1871 * Sends an offload packet. We use the packet priority to select the
1872 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1873 * should be sent as regular or control, bits 1-3 select the queue set.
1874 */
1875int
1876t3_offload_tx(struct toedev *tdev, struct mbuf *m)
1877{
1878 adapter_t *adap = tdev2adap(tdev);
1879 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
1880
1881 if (__predict_false(is_ctrl_pkt(m)))
1882 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
1883
1884 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
1885}
1886
1887/**
1888 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1889 * @tdev: the offload device that will be receiving the packets
1890 * @q: the SGE response queue that assembled the bundle
1891 * @m: the partial bundle
1892 * @n: the number of packets in the bundle
1893 *
1894 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1895 */
1896static __inline void
1897deliver_partial_bundle(struct toedev *tdev,
1898 struct sge_rspq *q,
1899 struct mbuf *mbufs[], int n)
1900{
1901 if (n) {
1902 q->offload_bundles++;
1903 cxgb_ofld_recv(tdev, mbufs, n);
1904 }
1905}
1906
1907static __inline int
1908rx_offload(struct toedev *tdev, struct sge_rspq *rq,
1909 struct mbuf *m, struct mbuf *rx_gather[],
1910 unsigned int gather_idx)
1911{
1912 rq->offload_pkts++;
1913 m->m_pkthdr.header = mtod(m, void *);
1914
1915 rx_gather[gather_idx++] = m;
1916 if (gather_idx == RX_BUNDLE_SIZE) {
1917 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1918 gather_idx = 0;
1919 rq->offload_bundles++;
1920 }
1921 return (gather_idx);
1922}
1923
1924static void
1925restart_tx(struct sge_qset *qs)
1926{
1927 struct adapter *sc = qs->port->adapter;
1928
1929 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
1930 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1931 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1932 qs->txq[TXQ_OFLD].restarts++;
1933 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk);
1934 }
1935 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
1936 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1937 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1938 qs->txq[TXQ_CTRL].restarts++;
1939 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk);
1940 }
1941}
1942
1943/**
1944 * t3_sge_alloc_qset - initialize an SGE queue set
1945 * @sc: the controller softc
1946 * @id: the queue set id
1947 * @nports: how many Ethernet ports will be using this queue set
1948 * @irq_vec_idx: the IRQ vector index for response queue interrupts
1949 * @p: configuration parameters for this queue set
1950 * @ntxq: number of Tx queues for the queue set
1951 * @pi: port info for queue set
1952 *
1953 * Allocate resources and initialize an SGE queue set. A queue set
1954 * comprises a response queue, two Rx free-buffer queues, and up to 3
1955 * Tx queues. The Tx queues are assigned roles in the order Ethernet
1956 * queue, offload queue, and control queue.
1957 */
1958int
1959t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
1960 const struct qset_params *p, int ntxq, struct port_info *pi)
1961{
1962 struct sge_qset *q = &sc->sge.qs[id];
1963 int i, ret = 0;
1964
1965 init_qset_cntxt(q, id);
1966
1967 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
1968 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
1969 &q->fl[0].desc, &q->fl[0].sdesc,
1970 &q->fl[0].desc_tag, &q->fl[0].desc_map,
1971 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
1972 printf("error %d from alloc ring fl0\n", ret);
1973 goto err;
1974 }
1975
1976 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
1977 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
1978 &q->fl[1].desc, &q->fl[1].sdesc,
1979 &q->fl[1].desc_tag, &q->fl[1].desc_map,
1980 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
1981 printf("error %d from alloc ring fl1\n", ret);
1982 goto err;
1983 }
1984
1985 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
1986 &q->rspq.phys_addr, &q->rspq.desc, NULL,
1987 &q->rspq.desc_tag, &q->rspq.desc_map,
1988 NULL, NULL)) != 0) {
1989 printf("error %d from alloc ring rspq\n", ret);
1990 goto err;
1991 }
1992
1993 for (i = 0; i < ntxq; ++i) {
1994 /*
1995 * The control queue always uses immediate data so does not
1996 * need to keep track of any mbufs.
1997 * XXX Placeholder for future TOE support.
1998 */
1999 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2000
2001 if ((ret = alloc_ring(sc, p->txq_size[i],
2002 sizeof(struct tx_desc), sz,
2003 &q->txq[i].phys_addr, &q->txq[i].desc,
2004 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2005 &q->txq[i].desc_map,
2006 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2007 printf("error %d from alloc ring tx %i\n", ret, i);
2008 goto err;
2009 }
2010 mbufq_init(&q->txq[i].sendq);
2011 q->txq[i].gen = 1;
2012 q->txq[i].size = p->txq_size[i];
1562 struct sge_qset *qs = &sc->sge.qs[i];
1563
1564 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk);
1565 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk);
1566 }
1567}
1568
1569
1570/**
1571 * free_tx_desc - reclaims Tx descriptors and their buffers
1572 * @adapter: the adapter
1573 * @q: the Tx queue to reclaim descriptors from
1574 * @n: the number of descriptors to reclaim
1575 *
1576 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1577 * Tx buffers. Called with the Tx queue lock held.
1578 */
1579int
1580free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec)
1581{
1582 struct tx_sw_desc *d;
1583 unsigned int cidx = q->cidx;
1584 int nbufs = 0;
1585
1586#ifdef T3_TRACE
1587 T3_TRACE2(sc->tb[q->cntxt_id & 7],
1588 "reclaiming %u Tx descriptors at cidx %u", n, cidx);
1589#endif
1590 d = &q->sdesc[cidx];
1591
1592 while (n-- > 0) {
1593 DPRINTF("cidx=%d d=%p\n", cidx, d);
1594 if (d->m) {
1595 if (d->flags & TX_SW_DESC_MAPPED) {
1596 bus_dmamap_unload(q->entry_tag, d->map);
1597 bus_dmamap_destroy(q->entry_tag, d->map);
1598 d->flags &= ~TX_SW_DESC_MAPPED;
1599 }
1600 if (m_get_priority(d->m) == cidx) {
1601 m_vec[nbufs] = d->m;
1602 d->m = NULL;
1603 nbufs++;
1604 } else {
1605 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
1606 }
1607 }
1608 ++d;
1609 if (++cidx == q->size) {
1610 cidx = 0;
1611 d = q->sdesc;
1612 }
1613 }
1614 q->cidx = cidx;
1615
1616 return (nbufs);
1617}
1618
1619/**
1620 * is_new_response - check if a response is newly written
1621 * @r: the response descriptor
1622 * @q: the response queue
1623 *
1624 * Returns true if a response descriptor contains a yet unprocessed
1625 * response.
1626 */
1627static __inline int
1628is_new_response(const struct rsp_desc *r,
1629 const struct sge_rspq *q)
1630{
1631 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1632}
1633
1634#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1635#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1636 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1637 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1638 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1639
1640/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1641#define NOMEM_INTR_DELAY 2500
1642
1643/**
1644 * write_ofld_wr - write an offload work request
1645 * @adap: the adapter
1646 * @m: the packet to send
1647 * @q: the Tx queue
1648 * @pidx: index of the first Tx descriptor to write
1649 * @gen: the generation value to use
1650 * @ndesc: number of descriptors the packet will occupy
1651 *
1652 * Write an offload work request to send the supplied packet. The packet
1653 * data already carry the work request with most fields populated.
1654 */
1655static void
1656write_ofld_wr(adapter_t *adap, struct mbuf *m,
1657 struct sge_txq *q, unsigned int pidx,
1658 unsigned int gen, unsigned int ndesc,
1659 bus_dma_segment_t *segs, unsigned int nsegs)
1660{
1661 unsigned int sgl_flits, flits;
1662 struct work_request_hdr *from;
1663 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1664 struct tx_desc *d = &q->desc[pidx];
1665 struct txq_state txqs;
1666
1667 if (immediate(m)) {
1668 q->sdesc[pidx].m = NULL;
1669 write_imm(d, m, m->m_len, gen);
1670 return;
1671 }
1672
1673 /* Only TX_DATA builds SGLs */
1674
1675 from = mtod(m, struct work_request_hdr *);
1676 memcpy(&d->flit[1], &from[1],
1677 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from));
1678
1679 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8;
1680 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1681
1682 make_sgl(sgp, segs, nsegs);
1683 sgl_flits = sgl_len(nsegs);
1684
1685 txqs.gen = q->gen;
1686 txqs.pidx = q->pidx;
1687 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1688 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1689 from->wr_hi, from->wr_lo);
1690}
1691
1692/**
1693 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1694 * @m: the packet
1695 *
1696 * Returns the number of Tx descriptors needed for the given offload
1697 * packet. These packets are already fully constructed.
1698 */
1699static __inline unsigned int
1700calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1701{
1702 unsigned int flits, cnt = 0;
1703
1704
1705 if (m->m_len <= WR_LEN)
1706 return 1; /* packet fits as immediate data */
1707
1708 if (m->m_flags & M_IOVEC)
1709 cnt = mtomv(m)->mv_count;
1710
1711 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */
1712
1713 return flits_to_desc(flits + sgl_len(cnt));
1714}
1715
1716/**
1717 * ofld_xmit - send a packet through an offload queue
1718 * @adap: the adapter
1719 * @q: the Tx offload queue
1720 * @m: the packet
1721 *
1722 * Send an offload packet through an SGE offload queue.
1723 */
1724static int
1725ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1726{
1727 int ret;
1728 unsigned int pidx, gen, nsegs;
1729 unsigned int ndesc;
1730 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1731 bus_dma_segment_t segs[TX_MAX_SEGS];
1732 int i, cleaned;
1733 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1734
1735 mtx_lock(&q->lock);
1736 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
1737 mtx_unlock(&q->lock);
1738 return (ret);
1739 }
1740 ndesc = calc_tx_descs_ofld(m, nsegs);
1741again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
1742
1743 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1744 if (__predict_false(ret)) {
1745 if (ret == 1) {
1746 m_set_priority(m, ndesc); /* save for restart */
1747 mtx_unlock(&q->lock);
1748 return NET_XMIT_CN;
1749 }
1750 goto again;
1751 }
1752
1753 gen = q->gen;
1754 q->in_use += ndesc;
1755 pidx = q->pidx;
1756 q->pidx += ndesc;
1757 if (q->pidx >= q->size) {
1758 q->pidx -= q->size;
1759 q->gen ^= 1;
1760 }
1761#ifdef T3_TRACE
1762 T3_TRACE5(adap->tb[q->cntxt_id & 7],
1763 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
1764 ndesc, pidx, skb->len, skb->len - skb->data_len,
1765 skb_shinfo(skb)->nr_frags);
1766#endif
1767 mtx_unlock(&q->lock);
1768
1769 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1770 check_ring_tx_db(adap, q);
1771
1772 for (i = 0; i < cleaned; i++) {
1773 m_freem_vec(m_vec[i]);
1774 }
1775 return NET_XMIT_SUCCESS;
1776}
1777
1778/**
1779 * restart_offloadq - restart a suspended offload queue
1780 * @qs: the queue set cotaining the offload queue
1781 *
1782 * Resumes transmission on a suspended Tx offload queue.
1783 */
1784static void
1785restart_offloadq(void *data, int npending)
1786{
1787
1788 struct mbuf *m;
1789 struct sge_qset *qs = data;
1790 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1791 adapter_t *adap = qs->port->adapter;
1792 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1793 bus_dma_segment_t segs[TX_MAX_SEGS];
1794 int nsegs, i, cleaned;
1795 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1796
1797 mtx_lock(&q->lock);
1798again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
1799
1800 while ((m = mbufq_peek(&q->sendq)) != NULL) {
1801 unsigned int gen, pidx;
1802 unsigned int ndesc = m_get_priority(m);
1803
1804 if (__predict_false(q->size - q->in_use < ndesc)) {
1805 setbit(&qs->txq_stopped, TXQ_OFLD);
1806 smp_mb();
1807
1808 if (should_restart_tx(q) &&
1809 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1810 goto again;
1811 q->stops++;
1812 break;
1813 }
1814
1815 gen = q->gen;
1816 q->in_use += ndesc;
1817 pidx = q->pidx;
1818 q->pidx += ndesc;
1819 if (q->pidx >= q->size) {
1820 q->pidx -= q->size;
1821 q->gen ^= 1;
1822 }
1823
1824 (void)mbufq_dequeue(&q->sendq);
1825 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
1826 mtx_unlock(&q->lock);
1827 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1828 mtx_lock(&q->lock);
1829 }
1830 mtx_unlock(&q->lock);
1831
1832#if USE_GTS
1833 set_bit(TXQ_RUNNING, &q->flags);
1834 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1835#endif
1836 t3_write_reg(adap, A_SG_KDOORBELL,
1837 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1838
1839 for (i = 0; i < cleaned; i++) {
1840 m_freem_vec(m_vec[i]);
1841 }
1842}
1843
1844/**
1845 * queue_set - return the queue set a packet should use
1846 * @m: the packet
1847 *
1848 * Maps a packet to the SGE queue set it should use. The desired queue
1849 * set is carried in bits 1-3 in the packet's priority.
1850 */
1851static __inline int
1852queue_set(const struct mbuf *m)
1853{
1854 return m_get_priority(m) >> 1;
1855}
1856
1857/**
1858 * is_ctrl_pkt - return whether an offload packet is a control packet
1859 * @m: the packet
1860 *
1861 * Determines whether an offload packet should use an OFLD or a CTRL
1862 * Tx queue. This is indicated by bit 0 in the packet's priority.
1863 */
1864static __inline int
1865is_ctrl_pkt(const struct mbuf *m)
1866{
1867 return m_get_priority(m) & 1;
1868}
1869
1870/**
1871 * t3_offload_tx - send an offload packet
1872 * @tdev: the offload device to send to
1873 * @m: the packet
1874 *
1875 * Sends an offload packet. We use the packet priority to select the
1876 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1877 * should be sent as regular or control, bits 1-3 select the queue set.
1878 */
1879int
1880t3_offload_tx(struct toedev *tdev, struct mbuf *m)
1881{
1882 adapter_t *adap = tdev2adap(tdev);
1883 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
1884
1885 if (__predict_false(is_ctrl_pkt(m)))
1886 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
1887
1888 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
1889}
1890
1891/**
1892 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1893 * @tdev: the offload device that will be receiving the packets
1894 * @q: the SGE response queue that assembled the bundle
1895 * @m: the partial bundle
1896 * @n: the number of packets in the bundle
1897 *
1898 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1899 */
1900static __inline void
1901deliver_partial_bundle(struct toedev *tdev,
1902 struct sge_rspq *q,
1903 struct mbuf *mbufs[], int n)
1904{
1905 if (n) {
1906 q->offload_bundles++;
1907 cxgb_ofld_recv(tdev, mbufs, n);
1908 }
1909}
1910
1911static __inline int
1912rx_offload(struct toedev *tdev, struct sge_rspq *rq,
1913 struct mbuf *m, struct mbuf *rx_gather[],
1914 unsigned int gather_idx)
1915{
1916 rq->offload_pkts++;
1917 m->m_pkthdr.header = mtod(m, void *);
1918
1919 rx_gather[gather_idx++] = m;
1920 if (gather_idx == RX_BUNDLE_SIZE) {
1921 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1922 gather_idx = 0;
1923 rq->offload_bundles++;
1924 }
1925 return (gather_idx);
1926}
1927
1928static void
1929restart_tx(struct sge_qset *qs)
1930{
1931 struct adapter *sc = qs->port->adapter;
1932
1933 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
1934 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1935 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1936 qs->txq[TXQ_OFLD].restarts++;
1937 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk);
1938 }
1939 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
1940 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1941 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1942 qs->txq[TXQ_CTRL].restarts++;
1943 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk);
1944 }
1945}
1946
1947/**
1948 * t3_sge_alloc_qset - initialize an SGE queue set
1949 * @sc: the controller softc
1950 * @id: the queue set id
1951 * @nports: how many Ethernet ports will be using this queue set
1952 * @irq_vec_idx: the IRQ vector index for response queue interrupts
1953 * @p: configuration parameters for this queue set
1954 * @ntxq: number of Tx queues for the queue set
1955 * @pi: port info for queue set
1956 *
1957 * Allocate resources and initialize an SGE queue set. A queue set
1958 * comprises a response queue, two Rx free-buffer queues, and up to 3
1959 * Tx queues. The Tx queues are assigned roles in the order Ethernet
1960 * queue, offload queue, and control queue.
1961 */
1962int
1963t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
1964 const struct qset_params *p, int ntxq, struct port_info *pi)
1965{
1966 struct sge_qset *q = &sc->sge.qs[id];
1967 int i, ret = 0;
1968
1969 init_qset_cntxt(q, id);
1970
1971 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
1972 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
1973 &q->fl[0].desc, &q->fl[0].sdesc,
1974 &q->fl[0].desc_tag, &q->fl[0].desc_map,
1975 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
1976 printf("error %d from alloc ring fl0\n", ret);
1977 goto err;
1978 }
1979
1980 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
1981 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
1982 &q->fl[1].desc, &q->fl[1].sdesc,
1983 &q->fl[1].desc_tag, &q->fl[1].desc_map,
1984 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
1985 printf("error %d from alloc ring fl1\n", ret);
1986 goto err;
1987 }
1988
1989 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
1990 &q->rspq.phys_addr, &q->rspq.desc, NULL,
1991 &q->rspq.desc_tag, &q->rspq.desc_map,
1992 NULL, NULL)) != 0) {
1993 printf("error %d from alloc ring rspq\n", ret);
1994 goto err;
1995 }
1996
1997 for (i = 0; i < ntxq; ++i) {
1998 /*
1999 * The control queue always uses immediate data so does not
2000 * need to keep track of any mbufs.
2001 * XXX Placeholder for future TOE support.
2002 */
2003 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2004
2005 if ((ret = alloc_ring(sc, p->txq_size[i],
2006 sizeof(struct tx_desc), sz,
2007 &q->txq[i].phys_addr, &q->txq[i].desc,
2008 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2009 &q->txq[i].desc_map,
2010 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2011 printf("error %d from alloc ring tx %i\n", ret, i);
2012 goto err;
2013 }
2014 mbufq_init(&q->txq[i].sendq);
2015 q->txq[i].gen = 1;
2016 q->txq[i].size = p->txq_size[i];
2013 mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF);
2017 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2018 device_get_unit(sc->dev), irq_vec_idx, i);
2019 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2014 }
2015
2016 TASK_INIT(&q->txq[TXQ_OFLD].qresume_tsk, 0, restart_offloadq, q);
2017 TASK_INIT(&q->txq[TXQ_CTRL].qresume_tsk, 0, restart_ctrlq, q);
2018
2019 q->fl[0].gen = q->fl[1].gen = 1;
2020 q->fl[0].size = p->fl_size;
2021 q->fl[1].size = p->jumbo_size;
2022
2023 q->rspq.gen = 1;
2024 q->rspq.size = p->rspq_size;
2020 }
2021
2022 TASK_INIT(&q->txq[TXQ_OFLD].qresume_tsk, 0, restart_offloadq, q);
2023 TASK_INIT(&q->txq[TXQ_CTRL].qresume_tsk, 0, restart_ctrlq, q);
2024
2025 q->fl[0].gen = q->fl[1].gen = 1;
2026 q->fl[0].size = p->fl_size;
2027 q->fl[1].size = p->jumbo_size;
2028
2029 q->rspq.gen = 1;
2030 q->rspq.size = p->rspq_size;
2025 mtx_init(&q->rspq.lock, "t3 rspq lock", NULL, MTX_DEF);
2026
2031
2027 q->txq[TXQ_ETH].stop_thres = nports *
2028 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2029
2030 q->fl[0].buf_size = MCLBYTES;
2031 q->fl[0].zone = zone_clust;
2032 q->fl[0].type = EXT_CLUSTER;
2033 q->fl[1].buf_size = MJUMPAGESIZE;
2034 q->fl[1].zone = zone_jumbop;
2035 q->fl[1].type = EXT_JUMBOP;
2036
2037 q->lro.enabled = lro_default;
2038
2039 mtx_lock(&sc->sge.reg_lock);
2040 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2041 q->rspq.phys_addr, q->rspq.size,
2042 q->fl[0].buf_size, 1, 0);
2043 if (ret) {
2044 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2045 goto err_unlock;
2046 }
2047
2048 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2049 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2050 q->fl[i].phys_addr, q->fl[i].size,
2051 q->fl[i].buf_size, p->cong_thres, 1,
2052 0);
2053 if (ret) {
2054 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2055 goto err_unlock;
2056 }
2057 }
2058
2059 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2060 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2061 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2062 1, 0);
2063 if (ret) {
2064 printf("error %d from t3_sge_init_ecntxt\n", ret);
2065 goto err_unlock;
2066 }
2067
2068 if (ntxq > 1) {
2069 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2070 USE_GTS, SGE_CNTXT_OFLD, id,
2071 q->txq[TXQ_OFLD].phys_addr,
2072 q->txq[TXQ_OFLD].size, 0, 1, 0);
2073 if (ret) {
2074 printf("error %d from t3_sge_init_ecntxt\n", ret);
2075 goto err_unlock;
2076 }
2077 }
2078
2079 if (ntxq > 2) {
2080 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2081 SGE_CNTXT_CTRL, id,
2082 q->txq[TXQ_CTRL].phys_addr,
2083 q->txq[TXQ_CTRL].size,
2084 q->txq[TXQ_CTRL].token, 1, 0);
2085 if (ret) {
2086 printf("error %d from t3_sge_init_ecntxt\n", ret);
2087 goto err_unlock;
2088 }
2089 }
2090
2032 q->txq[TXQ_ETH].stop_thres = nports *
2033 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2034
2035 q->fl[0].buf_size = MCLBYTES;
2036 q->fl[0].zone = zone_clust;
2037 q->fl[0].type = EXT_CLUSTER;
2038 q->fl[1].buf_size = MJUMPAGESIZE;
2039 q->fl[1].zone = zone_jumbop;
2040 q->fl[1].type = EXT_JUMBOP;
2041
2042 q->lro.enabled = lro_default;
2043
2044 mtx_lock(&sc->sge.reg_lock);
2045 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2046 q->rspq.phys_addr, q->rspq.size,
2047 q->fl[0].buf_size, 1, 0);
2048 if (ret) {
2049 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2050 goto err_unlock;
2051 }
2052
2053 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2054 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2055 q->fl[i].phys_addr, q->fl[i].size,
2056 q->fl[i].buf_size, p->cong_thres, 1,
2057 0);
2058 if (ret) {
2059 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2060 goto err_unlock;
2061 }
2062 }
2063
2064 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2065 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2066 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2067 1, 0);
2068 if (ret) {
2069 printf("error %d from t3_sge_init_ecntxt\n", ret);
2070 goto err_unlock;
2071 }
2072
2073 if (ntxq > 1) {
2074 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2075 USE_GTS, SGE_CNTXT_OFLD, id,
2076 q->txq[TXQ_OFLD].phys_addr,
2077 q->txq[TXQ_OFLD].size, 0, 1, 0);
2078 if (ret) {
2079 printf("error %d from t3_sge_init_ecntxt\n", ret);
2080 goto err_unlock;
2081 }
2082 }
2083
2084 if (ntxq > 2) {
2085 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2086 SGE_CNTXT_CTRL, id,
2087 q->txq[TXQ_CTRL].phys_addr,
2088 q->txq[TXQ_CTRL].size,
2089 q->txq[TXQ_CTRL].token, 1, 0);
2090 if (ret) {
2091 printf("error %d from t3_sge_init_ecntxt\n", ret);
2092 goto err_unlock;
2093 }
2094 }
2095
2096 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2097 device_get_unit(sc->dev), irq_vec_idx);
2098 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2099
2091 mtx_unlock(&sc->sge.reg_lock);
2092 t3_update_qset_coalesce(q, p);
2093 q->port = pi;
2094
2095 refill_fl(sc, &q->fl[0], q->fl[0].size);
2096 refill_fl(sc, &q->fl[1], q->fl[1].size);
2097 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2098
2099 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2100 V_NEWTIMER(q->rspq.holdoff_tmr));
2101
2102 return (0);
2103
2104err_unlock:
2105 mtx_unlock(&sc->sge.reg_lock);
2106err:
2107 t3_free_qset(sc, q);
2108
2109 return (ret);
2110}
2111
2112void
2113t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2114{
2115 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2116 struct ifnet *ifp = pi->ifp;
2117
2118 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2119 if (&pi->adapter->port[cpl->iff] != pi)
2120 panic("bad port index %d m->m_data=%p\n", cpl->iff, mtod(m, uint8_t *));
2121
2122 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2123 cpl->csum_valid && cpl->csum == 0xffff) {
2124 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2125 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2126 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2127 m->m_pkthdr.csum_data = 0xffff;
2128 }
2129 /*
2130 * XXX need to add VLAN support for 6.x
2131 */
2132#ifdef VLAN_SUPPORTED
2133 if (__predict_false(cpl->vlan_valid)) {
2134 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2135 m->m_flags |= M_VLANTAG;
2136 }
2137#endif
2138
2139 m->m_pkthdr.rcvif = ifp;
2140 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2141 m_explode(m);
2142 /*
2143 * adjust after conversion to mbuf chain
2144 */
2145 m_adj(m, sizeof(*cpl) + ethpad);
2146
2147 (*ifp->if_input)(ifp, m);
2148}
2149
2150/**
2151 * get_packet - return the next ingress packet buffer from a free list
2152 * @adap: the adapter that received the packet
2153 * @drop_thres: # of remaining buffers before we start dropping packets
2154 * @qs: the qset that the SGE free list holding the packet belongs to
2155 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2156 * @r: response descriptor
2157 *
2158 * Get the next packet from a free list and complete setup of the
2159 * sk_buff. If the packet is small we make a copy and recycle the
2160 * original buffer, otherwise we use the original buffer itself. If a
2161 * positive drop threshold is supplied packets are dropped and their
2162 * buffers recycled if (a) the number of remaining buffers is under the
2163 * threshold and the packet is too big to copy, or (b) the packet should
2164 * be copied but there is no memory for the copy.
2165 */
2166static int
2167get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2168 struct mbuf *m, struct rsp_desc *r)
2169{
2170
2171 unsigned int len_cq = ntohl(r->len_cq);
2172 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2173 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2174 uint32_t len = G_RSPD_LEN(len_cq);
2175 uint32_t flags = ntohl(r->flags);
2176 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2177 void *cl;
2178 int ret = 0;
2179
2180 prefetch(sd->cl);
2181
2182 fl->credits--;
2183 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2184
2185 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2186 cl = mtod(m, void *);
2187 memcpy(cl, sd->cl, len);
2188 recycle_rx_buf(adap, fl, fl->cidx);
2189 } else {
2190 cl = sd->cl;
2191 bus_dmamap_unload(fl->entry_tag, sd->map);
2192 }
2193 switch(sopeop) {
2194 case RSPQ_SOP_EOP:
2195 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2196 if (cl == sd->cl)
2197 m_cljset(m, cl, fl->type);
2198 m->m_len = m->m_pkthdr.len = len;
2199 ret = 1;
2200 goto done;
2201 break;
2202 case RSPQ_NSOP_NEOP:
2203 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2204 ret = 0;
2205 break;
2206 case RSPQ_SOP:
2207 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2208 m_iovinit(m);
2209 ret = 0;
2210 break;
2211 case RSPQ_EOP:
2212 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2213 ret = 1;
2214 break;
2215 }
2216 m_iovappend(m, cl, fl->buf_size, len, 0);
2217
2218done:
2219 if (++fl->cidx == fl->size)
2220 fl->cidx = 0;
2221
2222 return (ret);
2223}
2224
2225/**
2226 * handle_rsp_cntrl_info - handles control information in a response
2227 * @qs: the queue set corresponding to the response
2228 * @flags: the response control flags
2229 *
2230 * Handles the control information of an SGE response, such as GTS
2231 * indications and completion credits for the queue set's Tx queues.
2232 * HW coalesces credits, we don't do any extra SW coalescing.
2233 */
2234static __inline void
2235handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2236{
2237 unsigned int credits;
2238
2239#if USE_GTS
2240 if (flags & F_RSPD_TXQ0_GTS)
2241 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2242#endif
2243 credits = G_RSPD_TXQ0_CR(flags);
2244 if (credits) {
2245 qs->txq[TXQ_ETH].processed += credits;
2246 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
2247 taskqueue_enqueue(qs->port->adapter->tq,
2248 &qs->port->timer_reclaim_task);
2249 }
2250
2251 credits = G_RSPD_TXQ2_CR(flags);
2252 if (credits)
2253 qs->txq[TXQ_CTRL].processed += credits;
2254
2255# if USE_GTS
2256 if (flags & F_RSPD_TXQ1_GTS)
2257 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2258# endif
2259 credits = G_RSPD_TXQ1_CR(flags);
2260 if (credits)
2261 qs->txq[TXQ_OFLD].processed += credits;
2262}
2263
2264static void
2265check_ring_db(adapter_t *adap, struct sge_qset *qs,
2266 unsigned int sleeping)
2267{
2268 ;
2269}
2270
2271/**
2272 * process_responses - process responses from an SGE response queue
2273 * @adap: the adapter
2274 * @qs: the queue set to which the response queue belongs
2275 * @budget: how many responses can be processed in this round
2276 *
2277 * Process responses from an SGE response queue up to the supplied budget.
2278 * Responses include received packets as well as credits and other events
2279 * for the queues that belong to the response queue's queue set.
2280 * A negative budget is effectively unlimited.
2281 *
2282 * Additionally choose the interrupt holdoff time for the next interrupt
2283 * on this queue. If the system is under memory shortage use a fairly
2284 * long delay to help recovery.
2285 */
2286static int
2287process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2288{
2289 struct sge_rspq *rspq = &qs->rspq;
2290 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2291 int budget_left = budget;
2292 unsigned int sleeping = 0;
2293 int lro = qs->lro.enabled;
2294 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2295 int ngathered = 0;
2296#ifdef DEBUG
2297 static int last_holdoff = 0;
2298 if (rspq->holdoff_tmr != last_holdoff) {
2299 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2300 last_holdoff = rspq->holdoff_tmr;
2301 }
2302#endif
2303 rspq->next_holdoff = rspq->holdoff_tmr;
2304
2305 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2306 int eth, eop = 0, ethpad = 0;
2307 uint32_t flags = ntohl(r->flags);
2308 uint32_t rss_csum = *(const uint32_t *)r;
2309 uint32_t rss_hash = r->rss_hdr.rss_hash_val;
2310
2311 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2312
2313 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2314 /* XXX */
2315 printf("async notification\n");
2316
2317 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2318 struct mbuf *m = NULL;
2319 if (cxgb_debug)
2320 printf("IMM DATA VALID\n");
2321 if (rspq->m == NULL)
2322 rspq->m = m_gethdr(M_NOWAIT, MT_DATA);
2323 else
2324 m = m_gethdr(M_NOWAIT, MT_DATA);
2325
2326 if (rspq->m == NULL || m == NULL) {
2327 rspq->next_holdoff = NOMEM_INTR_DELAY;
2328 budget_left--;
2329 break;
2330 }
2331 get_imm_packet(adap, r, rspq->m, m);
2332 eop = 1;
2333 rspq->imm_data++;
2334 } else if (r->len_cq) {
2335 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2336
2337 if (rspq->m == NULL)
2338 rspq->m = m_gethdr(M_NOWAIT, MT_DATA);
2339 if (rspq->m == NULL) {
2340 log(LOG_WARNING, "failed to get mbuf for packet\n");
2341 break;
2342 }
2343
2344 ethpad = 2;
2345 eop = get_packet(adap, drop_thresh, qs, rspq->m, r);
2346 } else {
2347 DPRINTF("pure response\n");
2348 rspq->pure_rsps++;
2349 }
2350
2351 if (flags & RSPD_CTRL_MASK) {
2352 sleeping |= flags & RSPD_GTS_MASK;
2353 handle_rsp_cntrl_info(qs, flags);
2354 }
2355
2356 r++;
2357 if (__predict_false(++rspq->cidx == rspq->size)) {
2358 rspq->cidx = 0;
2359 rspq->gen ^= 1;
2360 r = rspq->desc;
2361 }
2362
2363 prefetch(r);
2364 if (++rspq->credits >= (rspq->size / 4)) {
2365 refill_rspq(adap, rspq, rspq->credits);
2366 rspq->credits = 0;
2367 }
2368
2369 if (eop) {
2370 prefetch(mtod(rspq->m, uint8_t *));
2371 prefetch(mtod(rspq->m, uint8_t *) + L1_CACHE_BYTES);
2372
2373 if (eth) {
2374 t3_rx_eth_lro(adap, rspq, rspq->m, ethpad,
2375 rss_hash, rss_csum, lro);
2376
2377 rspq->m = NULL;
2378 } else {
2379 rspq->m->m_pkthdr.csum_data = rss_csum;
2380 /*
2381 * XXX size mismatch
2382 */
2383 m_set_priority(rspq->m, rss_hash);
2384
2385 ngathered = rx_offload(&adap->tdev, rspq, rspq->m,
2386 offload_mbufs, ngathered);
2387 }
2388#ifdef notyet
2389 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task);
2390#else
2391 __refill_fl(adap, &qs->fl[0]);
2392 __refill_fl(adap, &qs->fl[1]);
2393#endif
2394 }
2395 --budget_left;
2396 }
2397
2398 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2399 t3_lro_flush(adap, qs, &qs->lro);
2400
2401 if (sleeping)
2402 check_ring_db(adap, qs, sleeping);
2403
2404 smp_mb(); /* commit Tx queue processed updates */
2405 if (__predict_false(qs->txq_stopped != 0))
2406 restart_tx(qs);
2407
2408 budget -= budget_left;
2409 return (budget);
2410}
2411
2412/*
2413 * A helper function that processes responses and issues GTS.
2414 */
2415static __inline int
2416process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2417{
2418 int work;
2419 static int last_holdoff = 0;
2420
2421 work = process_responses(adap, rspq_to_qset(rq), -1);
2422
2423 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2424 printf("next_holdoff=%d\n", rq->next_holdoff);
2425 last_holdoff = rq->next_holdoff;
2426 }
2427
2428 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2429 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2430 return work;
2431}
2432
2433
2434/*
2435 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2436 * Handles data events from SGE response queues as well as error and other
2437 * async events as they all use the same interrupt pin. We use one SGE
2438 * response queue per port in this mode and protect all response queues with
2439 * queue 0's lock.
2440 */
2441void
2442t3b_intr(void *data)
2443{
2444 uint32_t map;
2445 adapter_t *adap = data;
2446 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2447 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2448
2449 t3_write_reg(adap, A_PL_CLI, 0);
2450 map = t3_read_reg(adap, A_SG_DATA_INTR);
2451
2452 if (!map)
2453 return;
2454
2455 if (__predict_false(map & F_ERRINTR))
2456 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2457
2458 mtx_lock(&q0->lock);
2459
2460 if (__predict_true(map & 1))
2461 process_responses_gts(adap, q0);
2462
2463 if (map & 2)
2464 process_responses_gts(adap, q1);
2465
2466 mtx_unlock(&q0->lock);
2467}
2468
2469/*
2470 * The MSI interrupt handler. This needs to handle data events from SGE
2471 * response queues as well as error and other async events as they all use
2472 * the same MSI vector. We use one SGE response queue per port in this mode
2473 * and protect all response queues with queue 0's lock.
2474 */
2475void
2476t3_intr_msi(void *data)
2477{
2478 adapter_t *adap = data;
2479 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2480 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2481 int new_packets = 0;
2482
2483 mtx_lock(&q0->lock);
2484 if (process_responses_gts(adap, q0)) {
2485 new_packets = 1;
2486 }
2487
2488 if (adap->params.nports == 2 &&
2489 process_responses_gts(adap, q1)) {
2490 new_packets = 1;
2491 }
2492
2493 mtx_unlock(&q0->lock);
2494 if (new_packets == 0)
2495 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2496}
2497
2498void
2499t3_intr_msix(void *data)
2500{
2501 struct sge_qset *qs = data;
2502 adapter_t *adap = qs->port->adapter;
2503 struct sge_rspq *rspq = &qs->rspq;
2504
2505 mtx_lock(&rspq->lock);
2506 if (process_responses_gts(adap, rspq) == 0)
2507 rspq->unhandled_irqs++;
2508 mtx_unlock(&rspq->lock);
2509}
2510
2511/*
2512 * broken by recent mbuf changes
2513 */
2514static int
2515t3_lro_enable(SYSCTL_HANDLER_ARGS)
2516{
2517 adapter_t *sc;
2518 int i, j, enabled, err, nqsets = 0;
2519
2520#ifndef LRO_WORKING
2521 return (0);
2522#endif
2523
2524 sc = arg1;
2525 enabled = sc->sge.qs[0].lro.enabled;
2526 err = sysctl_handle_int(oidp, &enabled, arg2, req);
2527
2528 if (err != 0)
2529 return (err);
2530 if (enabled == sc->sge.qs[0].lro.enabled)
2531 return (0);
2532
2533 for (i = 0; i < sc->params.nports; i++)
2534 for (j = 0; j < sc->port[i].nqsets; j++)
2535 nqsets++;
2536
2537 for (i = 0; i < nqsets; i++)
2538 sc->sge.qs[i].lro.enabled = enabled;
2539
2540 return (0);
2541}
2542
2543static int
2544t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
2545{
2546 adapter_t *sc = arg1;
2547 struct qset_params *qsp = &sc->params.sge.qset[0];
2548 int coalesce_nsecs;
2549 struct sge_qset *qs;
2550 int i, j, err, nqsets = 0;
2551 struct mtx *lock;
2552
2553 coalesce_nsecs = qsp->coalesce_nsecs;
2554 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
2555
2556 if (err != 0) {
2557 return (err);
2558 }
2559 if (coalesce_nsecs == qsp->coalesce_nsecs)
2560 return (0);
2561
2562 for (i = 0; i < sc->params.nports; i++)
2563 for (j = 0; j < sc->port[i].nqsets; j++)
2564 nqsets++;
2565
2566 coalesce_nsecs = max(100, coalesce_nsecs);
2567
2568 for (i = 0; i < nqsets; i++) {
2569 qs = &sc->sge.qs[i];
2570 qsp = &sc->params.sge.qset[i];
2571 qsp->coalesce_nsecs = coalesce_nsecs;
2572
2573 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
2574 &sc->sge.qs[0].rspq.lock;
2575
2576 mtx_lock(lock);
2577 t3_update_qset_coalesce(qs, qsp);
2578 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2579 V_NEWTIMER(qs->rspq.holdoff_tmr));
2580 mtx_unlock(lock);
2581 }
2582
2583 return (0);
2584}
2585
2586
2587void
2588t3_add_sysctls(adapter_t *sc)
2589{
2590 struct sysctl_ctx_list *ctx;
2591 struct sysctl_oid_list *children;
2592
2593 ctx = device_get_sysctl_ctx(sc->dev);
2594 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
2595
2596 /* random information */
2597 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
2598 "firmware_version",
2599 CTLFLAG_RD, &sc->fw_version,
2600 0, "firmware version");
2601
2602 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2603 "enable_lro",
2604 CTLTYPE_INT|CTLFLAG_RW, sc,
2605 0, t3_lro_enable,
2606 "I", "enable large receive offload");
2607
2608 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2609 "intr_coal",
2610 CTLTYPE_INT|CTLFLAG_RW, sc,
2611 0, t3_set_coalesce_nsecs,
2612 "I", "interrupt coalescing timer (ns)");
2613 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2614 "enable_debug",
2615 CTLFLAG_RW, &cxgb_debug,
2616 0, "enable verbose debugging output");
2617
2618 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2619 "collapse_free",
2620 CTLFLAG_RD, &collapse_free,
2621 0, "frees during collapse");
2622 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2623 "mb_free_vec_free",
2624 CTLFLAG_RD, &mb_free_vec_free,
2625 0, "frees during mb_free_vec");
2626 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2627 "collapse_mbufs",
2628 CTLFLAG_RW, &collapse_mbufs,
2629 0, "collapse mbuf chains into iovecs");
2630}
2631
2632/**
2633 * t3_get_desc - dump an SGE descriptor for debugging purposes
2634 * @qs: the queue set
2635 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2636 * @idx: the descriptor index in the queue
2637 * @data: where to dump the descriptor contents
2638 *
2639 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2640 * size of the descriptor.
2641 */
2642int
2643t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2644 unsigned char *data)
2645{
2646 if (qnum >= 6)
2647 return (EINVAL);
2648
2649 if (qnum < 3) {
2650 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2651 return -EINVAL;
2652 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2653 return sizeof(struct tx_desc);
2654 }
2655
2656 if (qnum == 3) {
2657 if (!qs->rspq.desc || idx >= qs->rspq.size)
2658 return (EINVAL);
2659 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2660 return sizeof(struct rsp_desc);
2661 }
2662
2663 qnum -= 4;
2664 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2665 return (EINVAL);
2666 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2667 return sizeof(struct rx_desc);
2668}
2100 mtx_unlock(&sc->sge.reg_lock);
2101 t3_update_qset_coalesce(q, p);
2102 q->port = pi;
2103
2104 refill_fl(sc, &q->fl[0], q->fl[0].size);
2105 refill_fl(sc, &q->fl[1], q->fl[1].size);
2106 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2107
2108 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2109 V_NEWTIMER(q->rspq.holdoff_tmr));
2110
2111 return (0);
2112
2113err_unlock:
2114 mtx_unlock(&sc->sge.reg_lock);
2115err:
2116 t3_free_qset(sc, q);
2117
2118 return (ret);
2119}
2120
2121void
2122t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2123{
2124 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2125 struct ifnet *ifp = pi->ifp;
2126
2127 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2128 if (&pi->adapter->port[cpl->iff] != pi)
2129 panic("bad port index %d m->m_data=%p\n", cpl->iff, mtod(m, uint8_t *));
2130
2131 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2132 cpl->csum_valid && cpl->csum == 0xffff) {
2133 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2134 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2135 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2136 m->m_pkthdr.csum_data = 0xffff;
2137 }
2138 /*
2139 * XXX need to add VLAN support for 6.x
2140 */
2141#ifdef VLAN_SUPPORTED
2142 if (__predict_false(cpl->vlan_valid)) {
2143 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2144 m->m_flags |= M_VLANTAG;
2145 }
2146#endif
2147
2148 m->m_pkthdr.rcvif = ifp;
2149 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2150 m_explode(m);
2151 /*
2152 * adjust after conversion to mbuf chain
2153 */
2154 m_adj(m, sizeof(*cpl) + ethpad);
2155
2156 (*ifp->if_input)(ifp, m);
2157}
2158
2159/**
2160 * get_packet - return the next ingress packet buffer from a free list
2161 * @adap: the adapter that received the packet
2162 * @drop_thres: # of remaining buffers before we start dropping packets
2163 * @qs: the qset that the SGE free list holding the packet belongs to
2164 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2165 * @r: response descriptor
2166 *
2167 * Get the next packet from a free list and complete setup of the
2168 * sk_buff. If the packet is small we make a copy and recycle the
2169 * original buffer, otherwise we use the original buffer itself. If a
2170 * positive drop threshold is supplied packets are dropped and their
2171 * buffers recycled if (a) the number of remaining buffers is under the
2172 * threshold and the packet is too big to copy, or (b) the packet should
2173 * be copied but there is no memory for the copy.
2174 */
2175static int
2176get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2177 struct mbuf *m, struct rsp_desc *r)
2178{
2179
2180 unsigned int len_cq = ntohl(r->len_cq);
2181 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2182 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2183 uint32_t len = G_RSPD_LEN(len_cq);
2184 uint32_t flags = ntohl(r->flags);
2185 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2186 void *cl;
2187 int ret = 0;
2188
2189 prefetch(sd->cl);
2190
2191 fl->credits--;
2192 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2193
2194 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2195 cl = mtod(m, void *);
2196 memcpy(cl, sd->cl, len);
2197 recycle_rx_buf(adap, fl, fl->cidx);
2198 } else {
2199 cl = sd->cl;
2200 bus_dmamap_unload(fl->entry_tag, sd->map);
2201 }
2202 switch(sopeop) {
2203 case RSPQ_SOP_EOP:
2204 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2205 if (cl == sd->cl)
2206 m_cljset(m, cl, fl->type);
2207 m->m_len = m->m_pkthdr.len = len;
2208 ret = 1;
2209 goto done;
2210 break;
2211 case RSPQ_NSOP_NEOP:
2212 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2213 ret = 0;
2214 break;
2215 case RSPQ_SOP:
2216 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2217 m_iovinit(m);
2218 ret = 0;
2219 break;
2220 case RSPQ_EOP:
2221 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2222 ret = 1;
2223 break;
2224 }
2225 m_iovappend(m, cl, fl->buf_size, len, 0);
2226
2227done:
2228 if (++fl->cidx == fl->size)
2229 fl->cidx = 0;
2230
2231 return (ret);
2232}
2233
2234/**
2235 * handle_rsp_cntrl_info - handles control information in a response
2236 * @qs: the queue set corresponding to the response
2237 * @flags: the response control flags
2238 *
2239 * Handles the control information of an SGE response, such as GTS
2240 * indications and completion credits for the queue set's Tx queues.
2241 * HW coalesces credits, we don't do any extra SW coalescing.
2242 */
2243static __inline void
2244handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2245{
2246 unsigned int credits;
2247
2248#if USE_GTS
2249 if (flags & F_RSPD_TXQ0_GTS)
2250 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2251#endif
2252 credits = G_RSPD_TXQ0_CR(flags);
2253 if (credits) {
2254 qs->txq[TXQ_ETH].processed += credits;
2255 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
2256 taskqueue_enqueue(qs->port->adapter->tq,
2257 &qs->port->timer_reclaim_task);
2258 }
2259
2260 credits = G_RSPD_TXQ2_CR(flags);
2261 if (credits)
2262 qs->txq[TXQ_CTRL].processed += credits;
2263
2264# if USE_GTS
2265 if (flags & F_RSPD_TXQ1_GTS)
2266 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2267# endif
2268 credits = G_RSPD_TXQ1_CR(flags);
2269 if (credits)
2270 qs->txq[TXQ_OFLD].processed += credits;
2271}
2272
2273static void
2274check_ring_db(adapter_t *adap, struct sge_qset *qs,
2275 unsigned int sleeping)
2276{
2277 ;
2278}
2279
2280/**
2281 * process_responses - process responses from an SGE response queue
2282 * @adap: the adapter
2283 * @qs: the queue set to which the response queue belongs
2284 * @budget: how many responses can be processed in this round
2285 *
2286 * Process responses from an SGE response queue up to the supplied budget.
2287 * Responses include received packets as well as credits and other events
2288 * for the queues that belong to the response queue's queue set.
2289 * A negative budget is effectively unlimited.
2290 *
2291 * Additionally choose the interrupt holdoff time for the next interrupt
2292 * on this queue. If the system is under memory shortage use a fairly
2293 * long delay to help recovery.
2294 */
2295static int
2296process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2297{
2298 struct sge_rspq *rspq = &qs->rspq;
2299 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2300 int budget_left = budget;
2301 unsigned int sleeping = 0;
2302 int lro = qs->lro.enabled;
2303 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2304 int ngathered = 0;
2305#ifdef DEBUG
2306 static int last_holdoff = 0;
2307 if (rspq->holdoff_tmr != last_holdoff) {
2308 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2309 last_holdoff = rspq->holdoff_tmr;
2310 }
2311#endif
2312 rspq->next_holdoff = rspq->holdoff_tmr;
2313
2314 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2315 int eth, eop = 0, ethpad = 0;
2316 uint32_t flags = ntohl(r->flags);
2317 uint32_t rss_csum = *(const uint32_t *)r;
2318 uint32_t rss_hash = r->rss_hdr.rss_hash_val;
2319
2320 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2321
2322 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2323 /* XXX */
2324 printf("async notification\n");
2325
2326 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2327 struct mbuf *m = NULL;
2328 if (cxgb_debug)
2329 printf("IMM DATA VALID\n");
2330 if (rspq->m == NULL)
2331 rspq->m = m_gethdr(M_NOWAIT, MT_DATA);
2332 else
2333 m = m_gethdr(M_NOWAIT, MT_DATA);
2334
2335 if (rspq->m == NULL || m == NULL) {
2336 rspq->next_holdoff = NOMEM_INTR_DELAY;
2337 budget_left--;
2338 break;
2339 }
2340 get_imm_packet(adap, r, rspq->m, m);
2341 eop = 1;
2342 rspq->imm_data++;
2343 } else if (r->len_cq) {
2344 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2345
2346 if (rspq->m == NULL)
2347 rspq->m = m_gethdr(M_NOWAIT, MT_DATA);
2348 if (rspq->m == NULL) {
2349 log(LOG_WARNING, "failed to get mbuf for packet\n");
2350 break;
2351 }
2352
2353 ethpad = 2;
2354 eop = get_packet(adap, drop_thresh, qs, rspq->m, r);
2355 } else {
2356 DPRINTF("pure response\n");
2357 rspq->pure_rsps++;
2358 }
2359
2360 if (flags & RSPD_CTRL_MASK) {
2361 sleeping |= flags & RSPD_GTS_MASK;
2362 handle_rsp_cntrl_info(qs, flags);
2363 }
2364
2365 r++;
2366 if (__predict_false(++rspq->cidx == rspq->size)) {
2367 rspq->cidx = 0;
2368 rspq->gen ^= 1;
2369 r = rspq->desc;
2370 }
2371
2372 prefetch(r);
2373 if (++rspq->credits >= (rspq->size / 4)) {
2374 refill_rspq(adap, rspq, rspq->credits);
2375 rspq->credits = 0;
2376 }
2377
2378 if (eop) {
2379 prefetch(mtod(rspq->m, uint8_t *));
2380 prefetch(mtod(rspq->m, uint8_t *) + L1_CACHE_BYTES);
2381
2382 if (eth) {
2383 t3_rx_eth_lro(adap, rspq, rspq->m, ethpad,
2384 rss_hash, rss_csum, lro);
2385
2386 rspq->m = NULL;
2387 } else {
2388 rspq->m->m_pkthdr.csum_data = rss_csum;
2389 /*
2390 * XXX size mismatch
2391 */
2392 m_set_priority(rspq->m, rss_hash);
2393
2394 ngathered = rx_offload(&adap->tdev, rspq, rspq->m,
2395 offload_mbufs, ngathered);
2396 }
2397#ifdef notyet
2398 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task);
2399#else
2400 __refill_fl(adap, &qs->fl[0]);
2401 __refill_fl(adap, &qs->fl[1]);
2402#endif
2403 }
2404 --budget_left;
2405 }
2406
2407 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2408 t3_lro_flush(adap, qs, &qs->lro);
2409
2410 if (sleeping)
2411 check_ring_db(adap, qs, sleeping);
2412
2413 smp_mb(); /* commit Tx queue processed updates */
2414 if (__predict_false(qs->txq_stopped != 0))
2415 restart_tx(qs);
2416
2417 budget -= budget_left;
2418 return (budget);
2419}
2420
2421/*
2422 * A helper function that processes responses and issues GTS.
2423 */
2424static __inline int
2425process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2426{
2427 int work;
2428 static int last_holdoff = 0;
2429
2430 work = process_responses(adap, rspq_to_qset(rq), -1);
2431
2432 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2433 printf("next_holdoff=%d\n", rq->next_holdoff);
2434 last_holdoff = rq->next_holdoff;
2435 }
2436
2437 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2438 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2439 return work;
2440}
2441
2442
2443/*
2444 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2445 * Handles data events from SGE response queues as well as error and other
2446 * async events as they all use the same interrupt pin. We use one SGE
2447 * response queue per port in this mode and protect all response queues with
2448 * queue 0's lock.
2449 */
2450void
2451t3b_intr(void *data)
2452{
2453 uint32_t map;
2454 adapter_t *adap = data;
2455 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2456 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2457
2458 t3_write_reg(adap, A_PL_CLI, 0);
2459 map = t3_read_reg(adap, A_SG_DATA_INTR);
2460
2461 if (!map)
2462 return;
2463
2464 if (__predict_false(map & F_ERRINTR))
2465 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2466
2467 mtx_lock(&q0->lock);
2468
2469 if (__predict_true(map & 1))
2470 process_responses_gts(adap, q0);
2471
2472 if (map & 2)
2473 process_responses_gts(adap, q1);
2474
2475 mtx_unlock(&q0->lock);
2476}
2477
2478/*
2479 * The MSI interrupt handler. This needs to handle data events from SGE
2480 * response queues as well as error and other async events as they all use
2481 * the same MSI vector. We use one SGE response queue per port in this mode
2482 * and protect all response queues with queue 0's lock.
2483 */
2484void
2485t3_intr_msi(void *data)
2486{
2487 adapter_t *adap = data;
2488 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2489 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2490 int new_packets = 0;
2491
2492 mtx_lock(&q0->lock);
2493 if (process_responses_gts(adap, q0)) {
2494 new_packets = 1;
2495 }
2496
2497 if (adap->params.nports == 2 &&
2498 process_responses_gts(adap, q1)) {
2499 new_packets = 1;
2500 }
2501
2502 mtx_unlock(&q0->lock);
2503 if (new_packets == 0)
2504 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2505}
2506
2507void
2508t3_intr_msix(void *data)
2509{
2510 struct sge_qset *qs = data;
2511 adapter_t *adap = qs->port->adapter;
2512 struct sge_rspq *rspq = &qs->rspq;
2513
2514 mtx_lock(&rspq->lock);
2515 if (process_responses_gts(adap, rspq) == 0)
2516 rspq->unhandled_irqs++;
2517 mtx_unlock(&rspq->lock);
2518}
2519
2520/*
2521 * broken by recent mbuf changes
2522 */
2523static int
2524t3_lro_enable(SYSCTL_HANDLER_ARGS)
2525{
2526 adapter_t *sc;
2527 int i, j, enabled, err, nqsets = 0;
2528
2529#ifndef LRO_WORKING
2530 return (0);
2531#endif
2532
2533 sc = arg1;
2534 enabled = sc->sge.qs[0].lro.enabled;
2535 err = sysctl_handle_int(oidp, &enabled, arg2, req);
2536
2537 if (err != 0)
2538 return (err);
2539 if (enabled == sc->sge.qs[0].lro.enabled)
2540 return (0);
2541
2542 for (i = 0; i < sc->params.nports; i++)
2543 for (j = 0; j < sc->port[i].nqsets; j++)
2544 nqsets++;
2545
2546 for (i = 0; i < nqsets; i++)
2547 sc->sge.qs[i].lro.enabled = enabled;
2548
2549 return (0);
2550}
2551
2552static int
2553t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
2554{
2555 adapter_t *sc = arg1;
2556 struct qset_params *qsp = &sc->params.sge.qset[0];
2557 int coalesce_nsecs;
2558 struct sge_qset *qs;
2559 int i, j, err, nqsets = 0;
2560 struct mtx *lock;
2561
2562 coalesce_nsecs = qsp->coalesce_nsecs;
2563 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
2564
2565 if (err != 0) {
2566 return (err);
2567 }
2568 if (coalesce_nsecs == qsp->coalesce_nsecs)
2569 return (0);
2570
2571 for (i = 0; i < sc->params.nports; i++)
2572 for (j = 0; j < sc->port[i].nqsets; j++)
2573 nqsets++;
2574
2575 coalesce_nsecs = max(100, coalesce_nsecs);
2576
2577 for (i = 0; i < nqsets; i++) {
2578 qs = &sc->sge.qs[i];
2579 qsp = &sc->params.sge.qset[i];
2580 qsp->coalesce_nsecs = coalesce_nsecs;
2581
2582 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
2583 &sc->sge.qs[0].rspq.lock;
2584
2585 mtx_lock(lock);
2586 t3_update_qset_coalesce(qs, qsp);
2587 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2588 V_NEWTIMER(qs->rspq.holdoff_tmr));
2589 mtx_unlock(lock);
2590 }
2591
2592 return (0);
2593}
2594
2595
2596void
2597t3_add_sysctls(adapter_t *sc)
2598{
2599 struct sysctl_ctx_list *ctx;
2600 struct sysctl_oid_list *children;
2601
2602 ctx = device_get_sysctl_ctx(sc->dev);
2603 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
2604
2605 /* random information */
2606 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
2607 "firmware_version",
2608 CTLFLAG_RD, &sc->fw_version,
2609 0, "firmware version");
2610
2611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2612 "enable_lro",
2613 CTLTYPE_INT|CTLFLAG_RW, sc,
2614 0, t3_lro_enable,
2615 "I", "enable large receive offload");
2616
2617 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2618 "intr_coal",
2619 CTLTYPE_INT|CTLFLAG_RW, sc,
2620 0, t3_set_coalesce_nsecs,
2621 "I", "interrupt coalescing timer (ns)");
2622 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2623 "enable_debug",
2624 CTLFLAG_RW, &cxgb_debug,
2625 0, "enable verbose debugging output");
2626
2627 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2628 "collapse_free",
2629 CTLFLAG_RD, &collapse_free,
2630 0, "frees during collapse");
2631 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2632 "mb_free_vec_free",
2633 CTLFLAG_RD, &mb_free_vec_free,
2634 0, "frees during mb_free_vec");
2635 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2636 "collapse_mbufs",
2637 CTLFLAG_RW, &collapse_mbufs,
2638 0, "collapse mbuf chains into iovecs");
2639}
2640
2641/**
2642 * t3_get_desc - dump an SGE descriptor for debugging purposes
2643 * @qs: the queue set
2644 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2645 * @idx: the descriptor index in the queue
2646 * @data: where to dump the descriptor contents
2647 *
2648 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2649 * size of the descriptor.
2650 */
2651int
2652t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2653 unsigned char *data)
2654{
2655 if (qnum >= 6)
2656 return (EINVAL);
2657
2658 if (qnum < 3) {
2659 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2660 return -EINVAL;
2661 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2662 return sizeof(struct tx_desc);
2663 }
2664
2665 if (qnum == 3) {
2666 if (!qs->rspq.desc || idx >= qs->rspq.size)
2667 return (EINVAL);
2668 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2669 return sizeof(struct rsp_desc);
2670 }
2671
2672 qnum -= 4;
2673 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2674 return (EINVAL);
2675 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2676 return sizeof(struct rx_desc);
2677}