1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: cxgb_sge.c,v 1.1 2010/03/21 21:11:13 jklos Exp $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/conf.h>
37#include <sys/bus.h>
38#include <sys/queue.h>
39#include <sys/sysctl.h>
40
41#include <sys/proc.h>
42#include <sys/sched.h>
43#include <sys/systm.h>
44
45#include <netinet/in_systm.h>
46#include <netinet/in.h>
47#include <netinet/ip.h>
48#include <netinet/tcp.h>
49
50#include <dev/pci/pcireg.h>
51#include <dev/pci/pcivar.h>
52
53#ifdef CONFIG_DEFINED
54#include <cxgb_include.h>
55#else
56#include <dev/pci/cxgb/cxgb_include.h>
57#endif
58
59uint32_t collapse_free = 0;
60uint32_t mb_free_vec_free = 0;
61int      txq_fills = 0;
62int      collapse_mbufs = 0;
63static int bogus_imm = 0;
64#ifndef DISABLE_MBUF_IOVEC
65static int recycle_enable = 1;
66#endif
67
68#define USE_GTS 0
69
70#define SGE_RX_SM_BUF_SIZE  1536
71#define SGE_RX_DROP_THRES   16
72#define SGE_RX_COPY_THRES   128
73
74/*
75 * Period of the Tx buffer reclaim timer.  This timer does not need to run
76 * frequently as Tx buffers are usually reclaimed by new Tx packets.
77 */
78#define TX_RECLAIM_PERIOD       (hz >> 1)
79
80/*
81 * work request size in bytes
82 */
83#define WR_LEN (WR_FLITS * 8)
84
85/*
86 * Values for sge_txq.flags
87 */
88enum {
89    TXQ_RUNNING = 1 << 0,  /* fetch engine is running */
90    TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
91};
92
93struct tx_desc {
94    uint64_t    flit[TX_DESC_FLITS];
95} __packed;
96
97struct rx_desc {
98    uint32_t    addr_lo;
99    uint32_t    len_gen;
100    uint32_t    gen2;
101    uint32_t    addr_hi;
102} __packed;
103
104struct rsp_desc {               /* response queue descriptor */
105    struct rss_header   rss_hdr;
106    uint32_t        flags;
107    uint32_t        len_cq;
108    uint8_t         imm_data[47];
109    uint8_t         intr_gen;
110} __packed;
111
112#define RX_SW_DESC_MAP_CREATED  (1 << 0)
113#define TX_SW_DESC_MAP_CREATED  (1 << 1)
114#define RX_SW_DESC_INUSE        (1 << 3)
115#define TX_SW_DESC_MAPPED       (1 << 4)
116
117#define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
118#define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
119#define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
120#define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
121
122struct tx_sw_desc {                /* SW state per Tx descriptor */
123    struct mbuf *m;
124    bus_dma_segment_t segs[1];
125    bus_dmamap_t    map;
126    int     flags;
127};
128
129struct rx_sw_desc {                /* SW state per Rx descriptor */
130    void            *cl;
131    bus_dmamap_t    map;
132    int     flags;
133};
134
135struct txq_state {
136    unsigned int compl;
137    unsigned int gen;
138    unsigned int pidx;
139};
140
141/*
142 * Maps a number of flits to the number of Tx descriptors that can hold them.
143 * The formula is
144 *
145 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
146 *
147 * HW allows up to 4 descriptors to be combined into a WR.
148 */
149static uint8_t flit_desc_map[] = {
150    0,
151#if SGE_NUM_GENBITS == 1
152    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
153    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
154    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
155    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
156#elif SGE_NUM_GENBITS == 2
157    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
159    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
160    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
161#else
162# error "SGE_NUM_GENBITS must be 1 or 2"
163#endif
164};
165
166
167static int lro_default = 0;
168int cxgb_debug = 0;
169
170static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
171static void sge_timer_cb(void *arg);
172static void sge_timer_reclaim(struct work *wk, void *arg);
173static void sge_txq_reclaim_handler(struct work *wk, void *arg);
174static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec);
175
176/**
177 *  reclaim_completed_tx - reclaims completed Tx descriptors
178 *  @adapter: the adapter
179 *  @q: the Tx queue to reclaim completed descriptors from
180 *
181 *  Reclaims Tx descriptors that the SGE has indicated it has processed,
182 *  and frees the associated buffers if possible.  Called with the Tx
183 *  queue's lock held.
184 */
185static __inline int
186reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec)
187{
188    int reclaimed, reclaim = desc_reclaimable(q);
189    int n = 0;
190
191    mtx_assert(&q->lock, MA_OWNED);
192    if (reclaim > 0) {
193        n = free_tx_desc(q, min(reclaim, nbufs), mvec);
194        reclaimed = min(reclaim, nbufs);
195        q->cleaned += reclaimed;
196        q->in_use -= reclaimed;
197    }
198    return (n);
199}
200
201/**
202 *  should_restart_tx - are there enough resources to restart a Tx queue?
203 *  @q: the Tx queue
204 *
205 *  Checks if there are enough descriptors to restart a suspended Tx queue.
206 */
207static __inline int
208should_restart_tx(const struct sge_txq *q)
209{
210    unsigned int r = q->processed - q->cleaned;
211
212    return q->in_use - r < (q->size >> 1);
213}
214
215/**
216 *  t3_sge_init - initialize SGE
217 *  @adap: the adapter
218 *  @p: the SGE parameters
219 *
220 *  Performs SGE initialization needed every time after a chip reset.
221 *  We do not initialize any of the queue sets here, instead the driver
222 *  top-level must request those individually.  We also do not enable DMA
223 *  here, that should be done after the queues have been set up.
224 */
225void
226t3_sge_init(adapter_t *adap, struct sge_params *p)
227{
228    u_int ctrl, ups;
229
230    ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
231
232    ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
233           F_CQCRDTCTRL |
234           V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
235           V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
236#if SGE_NUM_GENBITS == 1
237    ctrl |= F_EGRGENCTRL;
238#endif
239    if (adap->params.rev > 0) {
240        if (!(adap->flags & (USING_MSIX | USING_MSI)))
241            ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
242        ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
243    }
244    t3_write_reg(adap, A_SG_CONTROL, ctrl);
245    t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
246             V_LORCQDRBTHRSH(512));
247    t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
248    t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
249             V_TIMEOUT(200 * core_ticks_per_usec(adap)));
250    t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
251    t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
252    t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
253    t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
254    t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
255    t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
256}
257
258
259/**
260 *  sgl_len - calculates the size of an SGL of the given capacity
261 *  @n: the number of SGL entries
262 *
263 *  Calculates the number of flits needed for a scatter/gather list that
264 *  can hold the given number of entries.
265 */
266static __inline unsigned int
267sgl_len(unsigned int n)
268{
269    return ((3 * n) / 2 + (n & 1));
270}
271
272/**
273 *  get_imm_packet - return the next ingress packet buffer from a response
274 *  @resp: the response descriptor containing the packet data
275 *
276 *  Return a packet containing the immediate data of the given response.
277 */
278#ifdef DISABLE_MBUF_IOVEC
279static __inline int
280get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh)
281{
282    struct mbuf *m;
283    int len;
284    uint32_t flags = ntohl(resp->flags);
285    uint8_t sopeop = G_RSPD_SOP_EOP(flags);
286
287    /*
288     * would be a firmware bug
289     */
290    if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
291        return (0);
292
293    m = m_gethdr(M_NOWAIT, MT_DATA);
294    len = G_RSPD_LEN(ntohl(resp->len_cq));
295
296    if (m) {
297        MH_ALIGN(m, IMMED_PKT_SIZE);
298        memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE);
299        m->m_len = len;
300
301        switch (sopeop) {
302        case RSPQ_SOP_EOP:
303            mh->mh_head = mh->mh_tail = m;
304            m->m_pkthdr.len = len;
305            m->m_flags |= M_PKTHDR;
306            break;
307        case RSPQ_EOP:
308            m->m_flags &= ~M_PKTHDR;
309            mh->mh_head->m_pkthdr.len += len;
310            mh->mh_tail->m_next = m;
311            mh->mh_tail = m;
312            break;
313        }
314    }
315    return (m != NULL);
316}
317
318#else
319static int
320get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags)
321{
322    int len, error;
323    uint8_t sopeop = G_RSPD_SOP_EOP(flags);
324
325    /*
326     * would be a firmware bug
327     */
328    len = G_RSPD_LEN(ntohl(resp->len_cq));
329    if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) {
330        if (cxgb_debug)
331            device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len);
332        bogus_imm++;
333        return (EINVAL);
334    }
335    error = 0;
336    switch (sopeop) {
337    case RSPQ_SOP_EOP:
338        m->m_len = m->m_pkthdr.len = len;
339        memcpy(mtod(m, uint8_t *), resp->imm_data, len);
340        break;
341    case RSPQ_EOP:
342        memcpy(cl, resp->imm_data, len);
343        m_iovappend(m, cl, MSIZE, len, 0);
344        break;
345    default:
346        bogus_imm++;
347        error = EINVAL;
348    }
349
350    return (error);
351}
352#endif
353
354static __inline u_int
355flits_to_desc(u_int n)
356{
357    return (flit_desc_map[n]);
358}
359
360void
361t3_sge_err_intr_handler(adapter_t *adapter)
362{
363    unsigned int v, status;
364
365
366    status = t3_read_reg(adapter, A_SG_INT_CAUSE);
367
368    if (status & F_RSPQCREDITOVERFOW)
369        CH_ALERT(adapter, "SGE response queue credit overflow\n");
370
371    if (status & F_RSPQDISABLED) {
372        v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
373
374        CH_ALERT(adapter,
375             "packet delivered to disabled response queue (0x%x)\n",
376             (v >> S_RSPQ0DISABLED) & 0xff);
377    }
378
379    t3_write_reg(adapter, A_SG_INT_CAUSE, status);
380    if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
381        t3_fatal_err(adapter);
382}
383
384void
385t3_sge_prep(adapter_t *adap, struct sge_params *p)
386{
387    int i;
388
389    /* XXX Does ETHER_ALIGN need to be accounted for here? */
390    p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
391
392    for (i = 0; i < SGE_QSETS; ++i) {
393        struct qset_params *q = p->qset + i;
394
395        q->polling = adap->params.rev > 0;
396
397        if (adap->params.nports > 2)
398            q->coalesce_nsecs = 50000;
399        else
400            q->coalesce_nsecs = 5000;
401
402        q->rspq_size = RSPQ_Q_SIZE;
403        q->fl_size = FL_Q_SIZE;
404        q->jumbo_size = JUMBO_Q_SIZE;
405        q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
406        q->txq_size[TXQ_OFLD] = 1024;
407        q->txq_size[TXQ_CTRL] = 256;
408        q->cong_thres = 0;
409    }
410}
411
412int
413t3_sge_alloc(adapter_t *sc)
414{
415    /* The parent tag. */
416    sc->parent_dmat = sc->pa.pa_dmat;
417
418    /*
419     * DMA tag for normal sized RX frames
420     */
421    sc->rx_dmat = sc->pa.pa_dmat;
422
423    /*
424     * DMA tag for jumbo sized RX frames.
425     */
426    sc->rx_jumbo_dmat = sc->pa.pa_dmat;
427
428    /*
429     * DMA tag for TX frames.
430     */
431    sc->tx_dmat = sc->pa.pa_dmat;
432
433    return (0);
434}
435
436int
437t3_sge_free(struct adapter * sc)
438{
439    return (0);
440}
441
442void
443t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
444{
445
446    qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
447    qs->rspq.polling = 0 /* p->polling */;
448}
449
450/**
451 *  refill_fl - refill an SGE free-buffer list
452 *  @sc: the controller softc
453 *  @q: the free-list to refill
454 *  @n: the number of new buffers to allocate
455 *
456 *  (Re)populate an SGE free-buffer list with up to @n new packet buffers.
457 *  The caller must assure that @n does not exceed the queue's capacity.
458 */
459static void
460refill_fl(adapter_t *sc, struct sge_fl *q, int n)
461{
462    struct rx_sw_desc *sd = &q->sdesc[q->pidx];
463    struct rx_desc *d = &q->desc[q->pidx];
464    void *cl;
465    int err;
466
467    while (n--) {
468        /*
469         * We only allocate a cluster, mbuf allocation happens after rx
470         */
471        if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0)
472        {
473            err = bus_dmamap_create(sc->pa.pa_dmat,
474                        q->buf_size, 1, q->buf_size, 0,
475                        BUS_DMA_ALLOCNOW, &sd->map);
476            if (err != 0)
477            {
478                log(LOG_WARNING, "failure in refill_fl\n");
479                return;
480            }
481            sd->flags |= RX_SW_DESC_MAP_CREATED;
482        }
483        cl = malloc(q->buf_size, M_DEVBUF, M_NOWAIT);
484        if (cl == NULL)
485        {
486            log(LOG_WARNING, "Failed to allocate cluster\n");
487            break;
488        }
489        err = bus_dmamap_load(sc->pa.pa_dmat, sd->map, cl, q->buf_size, NULL, BUS_DMA_NOWAIT);
490        if (err)
491        {
492            log(LOG_WARNING, "failure in refill_fl\n");
493            free(cl, M_DEVBUF);
494            return;
495        }
496
497        sd->flags |= RX_SW_DESC_INUSE;
498        sd->cl = cl;
499        d->addr_lo = htobe32(sd->map->dm_segs[0].ds_addr & 0xffffffff);
500        d->addr_hi = htobe32(((uint64_t)sd->map->dm_segs[0].ds_addr>>32) & 0xffffffff);
501        d->len_gen = htobe32(V_FLD_GEN1(q->gen));
502        d->gen2 = htobe32(V_FLD_GEN2(q->gen));
503
504        d++;
505        sd++;
506
507        if (++q->pidx == q->size) {
508            q->pidx = 0;
509            q->gen ^= 1;
510            sd = q->sdesc;
511            d = q->desc;
512        }
513        q->credits++;
514    }
515
516    t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
517}
518
519
520/**
521 *  free_rx_bufs - free the Rx buffers on an SGE free list
522 *  @sc: the controle softc
523 *  @q: the SGE free list to clean up
524 *
525 *  Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
526 *  this queue should be stopped before calling this function.
527 */
528static void
529free_rx_bufs(adapter_t *sc, struct sge_fl *q)
530{
531    u_int cidx = q->cidx;
532
533    while (q->credits--) {
534        struct rx_sw_desc *d = &q->sdesc[cidx];
535
536        if (d->flags & RX_SW_DESC_INUSE) {
537	    bus_dmamap_unload(q->entry_tag, d->map);
538	    bus_dmamap_destroy(q->entry_tag, d->map);
539	    d->map = NULL;
540            free(d->cl, M_DEVBUF);
541	    d->cl = NULL;
542        }
543        d->cl = NULL;
544        if (++cidx == q->size)
545            cidx = 0;
546    }
547}
548
549static __inline void
550__refill_fl(adapter_t *adap, struct sge_fl *fl)
551{
552    refill_fl(adap, fl, min(16U, fl->size - fl->credits));
553}
554
555#ifndef DISABLE_MBUF_IOVEC
556/**
557 *  recycle_rx_buf - recycle a receive buffer
558 *  @adapter: the adapter
559 *  @q: the SGE free list
560 *  @idx: index of buffer to recycle
561 *
562 *  Recycles the specified buffer on the given free list by adding it at
563 *  the next available slot on the list.
564 */
565static void
566recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
567{
568    struct rx_desc *from = &q->desc[idx];
569    struct rx_desc *to   = &q->desc[q->pidx];
570
571    q->sdesc[q->pidx] = q->sdesc[idx];
572    to->addr_lo = from->addr_lo;        // already big endian
573    to->addr_hi = from->addr_hi;        // likewise
574    wmb();
575    to->len_gen = htobe32(V_FLD_GEN1(q->gen));
576    to->gen2 = htobe32(V_FLD_GEN2(q->gen));
577    q->credits++;
578
579    if (++q->pidx == q->size) {
580        q->pidx = 0;
581        q->gen ^= 1;
582    }
583    t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
584}
585#endif
586
587static int
588alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
589    bus_addr_t *phys,
590    void *desc, void *sdesc, bus_dma_tag_t *tag,
591    bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
592{
593    size_t len = nelem * elem_size;
594    void *s = NULL;
595    void *p = NULL;
596    int err;
597    bus_dma_segment_t phys_seg;
598
599    int nsegs;
600
601    *tag = sc->pa.pa_dmat;
602
603    /* allocate wired physical memory for DMA descriptor array */
604    err = bus_dmamem_alloc(*tag, len, PAGE_SIZE, 0, &phys_seg, 1,
605                &nsegs, BUS_DMA_NOWAIT);
606    if (err != 0)
607    {
608        device_printf(sc->dev, "Cannot allocate descriptor memory\n");
609        return (ENOMEM);
610    }
611    *phys = phys_seg.ds_addr;
612
613    /* map physical address to kernel virtual address */
614    err = bus_dmamem_map(*tag, &phys_seg, 1, len, &p,
615                BUS_DMA_NOWAIT|BUS_DMA_COHERENT);
616    if (err != 0)
617    {
618        device_printf(sc->dev, "Cannot map descriptor memory\n");
619        return (ENOMEM);
620    }
621
622    memset(p, 0, len);
623    *(void **)desc = p;
624
625    if (sw_size)
626    {
627        len = nelem * sw_size;
628        s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
629        *(void **)sdesc = s;
630    }
631
632    if (parent_entry_tag == NULL)
633        return (0);
634    *entry_tag = sc->pa.pa_dmat;
635
636    return (0);
637}
638
639static void
640sge_slow_intr_handler(struct work *wk, void *arg)
641{
642    adapter_t *sc = arg;
643
644    t3_slow_intr_handler(sc);
645}
646
647/**
648 *  sge_timer_cb - perform periodic maintenance of an SGE qset
649 *  @data: the SGE queue set to maintain
650 *
651 *  Runs periodically from a timer to perform maintenance of an SGE queue
652 *  set.  It performs two tasks:
653 *
654 *  a) Cleans up any completed Tx descriptors that may still be pending.
655 *  Normal descriptor cleanup happens when new packets are added to a Tx
656 *  queue so this timer is relatively infrequent and does any cleanup only
657 *  if the Tx queue has not seen any new packets in a while.  We make a
658 *  best effort attempt to reclaim descriptors, in that we don't wait
659 *  around if we cannot get a queue's lock (which most likely is because
660 *  someone else is queueing new packets and so will also handle the clean
661 *  up).  Since control queues use immediate data exclusively we don't
662 *  bother cleaning them up here.
663 *
664 *  b) Replenishes Rx queues that have run out due to memory shortage.
665 *  Normally new Rx buffers are added when existing ones are consumed but
666 *  when out of memory a queue can become empty.  We try to add only a few
667 *  buffers here, the queue will be replenished fully as these new buffers
668 *  are used up if memory shortage has subsided.
669 *
670 *  c) Return coalesced response queue credits in case a response queue is
671 *  starved.
672 *
673 *  d) Ring doorbells for T304 tunnel queues since we have seen doorbell
674 *  fifo overflows and the FW doesn't implement any recovery scheme yet.
675 */
676
677static void
678sge_timer_cb(void *arg)
679{
680    adapter_t *sc = arg;
681    struct port_info *p;
682    struct sge_qset *qs;
683    struct sge_txq  *txq;
684    int i, j;
685    int reclaim_eth, reclaim_ofl, refill_rx;
686
687    for (i = 0; i < sc->params.nports; i++)
688        for (j = 0; j < sc->port[i].nqsets; j++) {
689            qs = &sc->sge.qs[i + j];
690            txq = &qs->txq[0];
691            reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
692            reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
693            refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
694                (qs->fl[1].credits < qs->fl[1].size));
695            if (reclaim_eth || reclaim_ofl || refill_rx) {
696                p = &sc->port[i];
697                workqueue_enqueue(p->timer_reclaim_task.wq, &p->timer_reclaim_task.w, NULL);
698                break;
699            }
700        }
701    if (sc->params.nports > 2) {
702        int k;
703
704        for_each_port(sc, k) {
705            struct port_info *pi = &sc->port[k];
706
707            t3_write_reg(sc, A_SG_KDOORBELL,
708                     F_SELEGRCNTX |
709                     (FW_TUNNEL_SGEEC_START + pi->first_qset));
710        }
711    }
712    if (sc->open_device_map != 0)
713        callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
714}
715
716/*
717 * This is meant to be a catch-all function to keep sge state private
718 * to sge.c
719 *
720 */
721int
722t3_sge_init_adapter(adapter_t *sc)
723{
724    callout_init(&sc->sge_timer_ch, 0);
725    callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
726    sc->slow_intr_task.name = "sge_slow_intr";
727    sc->slow_intr_task.func = sge_slow_intr_handler;
728    sc->slow_intr_task.context = sc;
729    kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &sc->slow_intr_task, NULL, "cxgb_make_task");
730    return (0);
731}
732
733int
734t3_sge_init_port(struct port_info *p)
735{
736    p->timer_reclaim_task.name = "sge_timer_reclaim";
737    p->timer_reclaim_task.func = sge_timer_reclaim;
738    p->timer_reclaim_task.context = p;
739    kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &p->timer_reclaim_task, NULL, "cxgb_make_task");
740
741    return (0);
742}
743
744void
745t3_sge_deinit_sw(adapter_t *sc)
746{
747    callout_drain(&sc->sge_timer_ch);
748}
749
750/**
751 *  refill_rspq - replenish an SGE response queue
752 *  @adapter: the adapter
753 *  @q: the response queue to replenish
754 *  @credits: how many new responses to make available
755 *
756 *  Replenishes a response queue by making the supplied number of responses
757 *  available to HW.
758 */
759static __inline void
760refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
761{
762
763    /* mbufs are allocated on demand when a rspq entry is processed. */
764    t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
765             V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
766}
767
768static __inline void
769sge_txq_reclaim_(struct sge_txq *txq)
770{
771    int reclaimable, i, n;
772    struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
773    struct port_info *p;
774
775    p = txq->port;
776reclaim_more:
777    n = 0;
778    reclaimable = desc_reclaimable(txq);
779    if (reclaimable > 0 && mtx_trylock(&txq->lock)) {
780        n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec);
781        mtx_unlock(&txq->lock);
782    }
783    if (n == 0)
784        return;
785
786    for (i = 0; i < n; i++) {
787        m_freem_vec(m_vec[i]);
788    }
789    if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
790        txq->size - txq->in_use >= TX_START_MAX_DESC) {
791        txq_fills++;
792        p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
793        workqueue_enqueue(p->start_task.wq, &p->start_task.w, NULL);
794    }
795
796    if (n)
797        goto reclaim_more;
798}
799
800static void
801sge_txq_reclaim_handler(struct work *wk, void *arg)
802{
803    struct sge_txq *q = arg;
804
805    sge_txq_reclaim_(q);
806}
807
808static void
809sge_timer_reclaim(struct work *wk, void *arg)
810{
811    struct port_info *p = arg;
812    int i, nqsets = p->nqsets;
813    adapter_t *sc = p->adapter;
814    struct sge_qset *qs;
815    struct sge_txq *txq;
816    struct mtx *lock;
817
818    for (i = 0; i < nqsets; i++) {
819        qs = &sc->sge.qs[i];
820        txq = &qs->txq[TXQ_ETH];
821        sge_txq_reclaim_(txq);
822
823        txq = &qs->txq[TXQ_OFLD];
824        sge_txq_reclaim_(txq);
825
826        lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
827                &sc->sge.qs[0].rspq.lock;
828
829        if (mtx_trylock(lock)) {
830            /* XXX currently assume that we are *NOT* polling */
831            uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
832
833            if (qs->fl[0].credits < qs->fl[0].size - 16)
834                __refill_fl(sc, &qs->fl[0]);
835            if (qs->fl[1].credits < qs->fl[1].size - 16)
836                __refill_fl(sc, &qs->fl[1]);
837
838            if (status & (1 << qs->rspq.cntxt_id)) {
839                if (qs->rspq.credits) {
840                    refill_rspq(sc, &qs->rspq, 1);
841                    qs->rspq.credits--;
842                    t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
843                        1 << qs->rspq.cntxt_id);
844                }
845            }
846            mtx_unlock(lock);
847        }
848    }
849}
850
851/**
852 *  init_qset_cntxt - initialize an SGE queue set context info
853 *  @qs: the queue set
854 *  @id: the queue set id
855 *
856 *  Initializes the TIDs and context ids for the queues of a queue set.
857 */
858static void
859init_qset_cntxt(struct sge_qset *qs, u_int id)
860{
861
862    qs->rspq.cntxt_id = id;
863    qs->fl[0].cntxt_id = 2 * id;
864    qs->fl[1].cntxt_id = 2 * id + 1;
865    qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
866    qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
867    qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
868    qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
869    qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
870}
871
872
873static void
874txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
875{
876    txq->in_use += ndesc;
877    /*
878     * XXX we don't handle stopping of queue
879     * presumably start handles this when we bump against the end
880     */
881    txqs->gen = txq->gen;
882    txq->unacked += ndesc;
883    txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
884    txq->unacked &= 7;
885    txqs->pidx = txq->pidx;
886    txq->pidx += ndesc;
887
888    if (txq->pidx >= txq->size) {
889        txq->pidx -= txq->size;
890        txq->gen ^= 1;
891    }
892
893}
894
895/**
896 *  calc_tx_descs - calculate the number of Tx descriptors for a packet
897 *  @m: the packet mbufs
898 *      @nsegs: the number of segments
899 *
900 *  Returns the number of Tx descriptors needed for the given Ethernet
901 *  packet.  Ethernet packets require addition of WR and CPL headers.
902 */
903static __inline unsigned int
904calc_tx_descs(const struct mbuf *m, int nsegs)
905{
906    unsigned int flits;
907
908    if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
909        return 1;
910
911    flits = sgl_len(nsegs) + 2;
912#ifdef TSO_SUPPORTED
913    if  (m->m_pkthdr.csum_flags & (CSUM_TSO))
914        flits++;
915#endif
916    return flits_to_desc(flits);
917}
918
919static unsigned int
920busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
921    struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs)
922{
923    struct mbuf *m0;
924    int err, pktlen;
925    int i, total_len;
926
927    m0 = *m;
928    pktlen = m0->m_pkthdr.len;
929
930    m0 = *m;
931    i = 0;
932    total_len = 0;
933    while (m0)
934    {
935        i++;
936        total_len += m0->m_len;
937        m0 = m0->m_next;
938    }
939    err = bus_dmamap_create(txq->entry_tag, total_len, TX_MAX_SEGS, total_len, 0, BUS_DMA_NOWAIT, &stx->map);
940    if (err)
941        return (err);
942    err = bus_dmamap_load_mbuf(txq->entry_tag, stx->map, *m, 0);
943    if (err)
944        return (err);
945    // feed out the physical mappings
946    *nsegs = stx->map->dm_nsegs;
947    for (i=0; i<*nsegs; i++)
948    {
949        segs[i] = stx->map->dm_segs[i];
950    }
951#ifdef DEBUG
952    if (err) {
953        int n = 0;
954        struct mbuf *mtmp = m0;
955        while(mtmp) {
956            n++;
957            mtmp = mtmp->m_next;
958        }
959        printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
960            err, m0->m_pkthdr.len, n);
961    }
962#endif
963    if (err == EFBIG) {
964        /* Too many segments, try to defrag */
965        m0 = m_defrag(m0, M_DONTWAIT);
966        if (m0 == NULL) {
967            m_freem(*m);
968            *m = NULL;
969            return (ENOBUFS);
970        }
971        *m = m0;
972        INT3; // XXXXXXXXXXXXXXXXXX like above!
973    }
974
975    if (err == ENOMEM) {
976        return (err);
977    }
978
979    if (err) {
980        if (cxgb_debug)
981            printf("map failure err=%d pktlen=%d\n", err, pktlen);
982        m_freem_vec(m0);
983        *m = NULL;
984        return (err);
985    }
986
987    bus_dmamap_sync(txq->entry_tag, stx->map, 0, pktlen, BUS_DMASYNC_PREWRITE);
988    stx->flags |= TX_SW_DESC_MAPPED;
989
990    return (0);
991}
992
993/**
994 *  make_sgl - populate a scatter/gather list for a packet
995 *  @sgp: the SGL to populate
996 *  @segs: the packet dma segments
997 *  @nsegs: the number of segments
998 *
999 *  Generates a scatter/gather list for the buffers that make up a packet
1000 *  and returns the SGL size in 8-byte words.  The caller must size the SGL
1001 *  appropriately.
1002 */
1003static __inline void
1004make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1005{
1006    int i, idx;
1007
1008    for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
1009        if (i && idx == 0)
1010            ++sgp;
1011
1012        sgp->len[idx] = htobe32(segs[i].ds_len);
1013        sgp->addr[idx] = htobe64(segs[i].ds_addr);
1014    }
1015
1016    if (idx)
1017        sgp->len[idx] = 0;
1018}
1019
1020/**
1021 *  check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1022 *  @adap: the adapter
1023 *  @q: the Tx queue
1024 *
1025 *  Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1026 *  where the HW is going to sleep just after we checked, however,
1027 *  then the interrupt handler will detect the outstanding TX packet
1028 *  and ring the doorbell for us.
1029 *
1030 *  When GTS is disabled we unconditionally ring the doorbell.
1031 */
1032static __inline void
1033check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1034{
1035#if USE_GTS
1036    clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1037    if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1038        set_bit(TXQ_LAST_PKT_DB, &q->flags);
1039#ifdef T3_TRACE
1040        T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1041              q->cntxt_id);
1042#endif
1043        t3_write_reg(adap, A_SG_KDOORBELL,
1044                 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1045    }
1046#else
1047    wmb();            /* write descriptors before telling HW */
1048    t3_write_reg(adap, A_SG_KDOORBELL,
1049             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1050#endif
1051}
1052
1053static __inline void
1054wr_gen2(struct tx_desc *d, unsigned int gen)
1055{
1056#if SGE_NUM_GENBITS == 2
1057    d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1058#endif
1059}
1060
1061
1062
1063/**
1064 *  write_wr_hdr_sgl - write a WR header and, optionally, SGL
1065 *  @ndesc: number of Tx descriptors spanned by the SGL
1066 *  @txd: first Tx descriptor to be written
1067 *  @txqs: txq state (generation and producer index)
1068 *  @txq: the SGE Tx queue
1069 *  @sgl: the SGL
1070 *  @flits: number of flits to the start of the SGL in the first descriptor
1071 *  @sgl_flits: the SGL size in flits
1072 *  @wr_hi: top 32 bits of WR header based on WR type (big endian)
1073 *  @wr_lo: low 32 bits of WR header based on WR type (big endian)
1074 *
1075 *  Write a work request header and an associated SGL.  If the SGL is
1076 *  small enough to fit into one Tx descriptor it has already been written
1077 *  and we just need to write the WR header.  Otherwise we distribute the
1078 *  SGL across the number of descriptors it spans.
1079 */
1080
1081static void
1082write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1083    const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1084    unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1085{
1086
1087    struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1088    struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1089
1090    if (__predict_true(ndesc == 1)) {
1091        wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1092            V_WR_SGLSFLT(flits)) | wr_hi;
1093        wmb();
1094        wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1095            V_WR_GEN(txqs->gen)) | wr_lo;
1096        /* XXX gen? */
1097        wr_gen2(txd, txqs->gen);
1098    } else {
1099        unsigned int ogen = txqs->gen;
1100        const uint64_t *fp = (const uint64_t *)sgl;
1101        struct work_request_hdr *wp = wrp;
1102
1103        wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1104            V_WR_SGLSFLT(flits)) | wr_hi;
1105
1106        while (sgl_flits) {
1107            unsigned int avail = WR_FLITS - flits;
1108
1109            if (avail > sgl_flits)
1110                avail = sgl_flits;
1111            memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1112            sgl_flits -= avail;
1113            ndesc--;
1114            if (!sgl_flits)
1115                break;
1116
1117            fp += avail;
1118            txd++;
1119            txsd++;
1120            if (++txqs->pidx == txq->size) {
1121                txqs->pidx = 0;
1122                txqs->gen ^= 1;
1123                txd = txq->desc;
1124                txsd = txq->sdesc;
1125            }
1126
1127            /*
1128             * when the head of the mbuf chain
1129             * is freed all clusters will be freed
1130             * with it
1131             */
1132            txsd->m = NULL;
1133            wrp = (struct work_request_hdr *)txd;
1134            wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1135                V_WR_SGLSFLT(1)) | wr_hi;
1136            wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1137                    sgl_flits + 1)) |
1138                V_WR_GEN(txqs->gen)) | wr_lo;
1139            wr_gen2(txd, txqs->gen);
1140            flits = 1;
1141        }
1142        wrp->wr_hi |= htonl(F_WR_EOP);
1143        wmb();
1144        wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1145        wr_gen2((struct tx_desc *)wp, ogen);
1146    }
1147}
1148
1149
1150/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1151#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1152
1153int
1154t3_encap(struct port_info *p, struct mbuf **m, int *free_it)
1155{
1156    adapter_t *sc;
1157    struct mbuf *m0;
1158    struct sge_qset *qs;
1159    struct sge_txq *txq;
1160    struct tx_sw_desc *stx;
1161    struct txq_state txqs;
1162    unsigned int ndesc, flits, cntrl, mlen;
1163    int err, nsegs, tso_info = 0;
1164
1165    struct work_request_hdr *wrp;
1166    struct tx_sw_desc *txsd;
1167    struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1168    bus_dma_segment_t segs[TX_MAX_SEGS];
1169    uint32_t wr_hi, wr_lo, sgl_flits;
1170
1171    struct tx_desc *txd;
1172    struct cpl_tx_pkt *cpl;
1173
1174    m0 = *m;
1175    sc = p->adapter;
1176
1177    DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset);
1178
1179    /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */
1180
1181    qs = &sc->sge.qs[p->first_qset];
1182
1183    txq = &qs->txq[TXQ_ETH];
1184    stx = &txq->sdesc[txq->pidx];
1185    txd = &txq->desc[txq->pidx];
1186    cpl = (struct cpl_tx_pkt *)txd;
1187    mlen = m0->m_pkthdr.len;
1188    cpl->len = htonl(mlen | 0x80000000);
1189
1190    DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan);
1191    /*
1192     * XXX handle checksum, TSO, and VLAN here
1193     *
1194     */
1195    cntrl = V_TXPKT_INTF(p->txpkt_intf);
1196
1197    /*
1198     * XXX need to add VLAN support for 6.x
1199     */
1200#ifdef VLAN_SUPPORTED
1201    if (m0->m_flags & M_VLANTAG)
1202        cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
1203    if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1204        tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1205#endif
1206    if (tso_info) {
1207        int eth_type;
1208        struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
1209        struct ip *ip;
1210        struct tcphdr *tcp;
1211        char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
1212
1213        txd->flit[2] = 0;
1214        cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1215        hdr->cntrl = htonl(cntrl);
1216
1217        if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1218            pkthdr = &tmp[0];
1219            m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
1220        } else {
1221            pkthdr = mtod(m0, char *);
1222        }
1223
1224#ifdef VLAN_SUPPORTED
1225        if (__predict_false(m0->m_flags & M_VLANTAG)) {
1226            eth_type = CPL_ETH_II_VLAN;
1227            ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1228                ETHER_VLAN_ENCAP_LEN);
1229        } else {
1230            eth_type = CPL_ETH_II;
1231            ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1232        }
1233#else
1234        eth_type = CPL_ETH_II;
1235        ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1236#endif
1237        tcp = (struct tcphdr *)((uint8_t *)ip +
1238            sizeof(*ip));
1239
1240        tso_info |= V_LSO_ETH_TYPE(eth_type) |
1241                V_LSO_IPHDR_WORDS(ip->ip_hl) |
1242                V_LSO_TCPHDR_WORDS(tcp->th_off);
1243        hdr->lso_info = htonl(tso_info);
1244        flits = 3;
1245    } else {
1246        cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1247        cpl->cntrl = htonl(cntrl);
1248
1249        if (mlen <= WR_LEN - sizeof(*cpl)) {
1250            txq_prod(txq, 1, &txqs);
1251            txq->sdesc[txqs.pidx].m = NULL;
1252
1253            if (m0->m_len == m0->m_pkthdr.len)
1254                memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
1255            else
1256                m_copydata(m0, 0, mlen, (void *)&txd->flit[2]);
1257
1258            *free_it = 1;
1259            flits = (mlen + 7) / 8 + 2;
1260            cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1261                      V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1262                      F_WR_SOP | F_WR_EOP | txqs.compl);
1263            wmb();
1264            cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1265                V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1266
1267            wr_gen2(txd, txqs.gen);
1268            check_ring_tx_db(sc, txq);
1269            return (0);
1270        }
1271        flits = 2;
1272    }
1273
1274    wrp = (struct work_request_hdr *)txd;
1275
1276    if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
1277        return (err);
1278    }
1279    m0 = *m;
1280    ndesc = calc_tx_descs(m0, nsegs);
1281
1282    sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1283    make_sgl(sgp, segs, nsegs);
1284
1285    sgl_flits = sgl_len(nsegs);
1286
1287    DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1288    txq_prod(txq, ndesc, &txqs);
1289    txsd = &txq->sdesc[txqs.pidx];
1290    wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1291    wr_lo = htonl(V_WR_TID(txq->token));
1292    txsd->m = m0;
1293    m_set_priority(m0, txqs.pidx);
1294
1295    write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1296    check_ring_tx_db(p->adapter, txq);
1297
1298    return (0);
1299}
1300
1301
1302/**
1303 *  write_imm - write a packet into a Tx descriptor as immediate data
1304 *  @d: the Tx descriptor to write
1305 *  @m: the packet
1306 *  @len: the length of packet data to write as immediate data
1307 *  @gen: the generation bit value to write
1308 *
1309 *  Writes a packet as immediate data into a Tx descriptor.  The packet
1310 *  contains a work request at its beginning.  We must write the packet
1311 *  carefully so the SGE doesn't read accidentally before it's written in
1312 *  its entirety.
1313 */
1314static __inline void
1315write_imm(struct tx_desc *d, struct mbuf *m,
1316      unsigned int len, unsigned int gen)
1317{
1318    struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1319    struct work_request_hdr *to = (struct work_request_hdr *)d;
1320
1321    memcpy(&to[1], &from[1], len - sizeof(*from));
1322    to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1323                    V_WR_BCNTLFLT(len & 7));
1324    wmb();
1325    to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1326                    V_WR_LEN((len + 7) / 8));
1327    wr_gen2(d, gen);
1328    m_freem(m);
1329}
1330
1331/**
1332 *  check_desc_avail - check descriptor availability on a send queue
1333 *  @adap: the adapter
1334 *  @q: the TX queue
1335 *  @m: the packet needing the descriptors
1336 *  @ndesc: the number of Tx descriptors needed
1337 *  @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1338 *
1339 *  Checks if the requested number of Tx descriptors is available on an
1340 *  SGE send queue.  If the queue is already suspended or not enough
1341 *  descriptors are available the packet is queued for later transmission.
1342 *  Must be called with the Tx queue locked.
1343 *
1344 *  Returns 0 if enough descriptors are available, 1 if there aren't
1345 *  enough descriptors and the packet has been queued, and 2 if the caller
1346 *  needs to retry because there weren't enough descriptors at the
1347 *  beginning of the call but some freed up in the mean time.
1348 */
1349static __inline int
1350check_desc_avail(adapter_t *adap, struct sge_txq *q,
1351         struct mbuf *m, unsigned int ndesc,
1352         unsigned int qid)
1353{
1354    /*
1355     * XXX We currently only use this for checking the control queue
1356     * the control queue is only used for binding qsets which happens
1357     * at init time so we are guaranteed enough descriptors
1358     */
1359    if (__predict_false(!mbufq_empty(&q->sendq))) {
1360addq_exit:  mbufq_tail(&q->sendq, m);
1361        return 1;
1362    }
1363    if (__predict_false(q->size - q->in_use < ndesc)) {
1364
1365        struct sge_qset *qs = txq_to_qset(q, qid);
1366
1367        setbit(&qs->txq_stopped, qid);
1368        smp_mb();
1369
1370        if (should_restart_tx(q) &&
1371            test_and_clear_bit(qid, &qs->txq_stopped))
1372            return 2;
1373
1374        q->stops++;
1375        goto addq_exit;
1376    }
1377    return 0;
1378}
1379
1380
1381/**
1382 *  reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1383 *  @q: the SGE control Tx queue
1384 *
1385 *  This is a variant of reclaim_completed_tx() that is used for Tx queues
1386 *  that send only immediate data (presently just the control queues) and
1387 *  thus do not have any mbufs
1388 */
1389static __inline void
1390reclaim_completed_tx_imm(struct sge_txq *q)
1391{
1392    unsigned int reclaim = q->processed - q->cleaned;
1393
1394    mtx_assert(&q->lock, MA_OWNED);
1395
1396    q->in_use -= reclaim;
1397    q->cleaned += reclaim;
1398}
1399
1400static __inline int
1401immediate(const struct mbuf *m)
1402{
1403    return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1404}
1405
1406/**
1407 *  ctrl_xmit - send a packet through an SGE control Tx queue
1408 *  @adap: the adapter
1409 *  @q: the control queue
1410 *  @m: the packet
1411 *
1412 *  Send a packet through an SGE control Tx queue.  Packets sent through
1413 *  a control queue must fit entirely as immediate data in a single Tx
1414 *  descriptor and have no page fragments.
1415 */
1416static int
1417ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1418{
1419    int ret;
1420    struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1421
1422    if (__predict_false(!immediate(m))) {
1423        m_freem(m);
1424        return 0;
1425    }
1426
1427    wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1428    wrp->wr_lo = htonl(V_WR_TID(q->token));
1429
1430    mtx_lock(&q->lock);
1431again:  reclaim_completed_tx_imm(q);
1432
1433    ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1434    if (__predict_false(ret)) {
1435        if (ret == 1) {
1436            mtx_unlock(&q->lock);
1437            return (-1);
1438        }
1439        goto again;
1440    }
1441
1442    write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1443
1444    q->in_use++;
1445    if (++q->pidx >= q->size) {
1446        q->pidx = 0;
1447        q->gen ^= 1;
1448    }
1449    mtx_unlock(&q->lock);
1450    wmb();
1451    t3_write_reg(adap, A_SG_KDOORBELL,
1452             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1453    return (0);
1454}
1455
1456
1457/**
1458 *  restart_ctrlq - restart a suspended control queue
1459 *  @qs: the queue set cotaining the control queue
1460 *
1461 *  Resumes transmission on a suspended Tx control queue.
1462 */
1463static void
1464restart_ctrlq(struct work *wk, void *data)
1465{
1466    struct mbuf *m;
1467    struct sge_qset *qs = (struct sge_qset *)data;
1468    struct sge_txq *q = &qs->txq[TXQ_CTRL];
1469    adapter_t *adap = qs->port->adapter;
1470
1471    mtx_lock(&q->lock);
1472again:  reclaim_completed_tx_imm(q);
1473
1474    while (q->in_use < q->size &&
1475           (m = mbufq_dequeue(&q->sendq)) != NULL) {
1476
1477        write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1478
1479        if (++q->pidx >= q->size) {
1480            q->pidx = 0;
1481            q->gen ^= 1;
1482        }
1483        q->in_use++;
1484    }
1485    if (!mbufq_empty(&q->sendq)) {
1486        setbit(&qs->txq_stopped, TXQ_CTRL);
1487        smp_mb();
1488
1489        if (should_restart_tx(q) &&
1490            test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1491            goto again;
1492        q->stops++;
1493    }
1494    mtx_unlock(&q->lock);
1495    t3_write_reg(adap, A_SG_KDOORBELL,
1496             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1497}
1498
1499
1500/*
1501 * Send a management message through control queue 0
1502 */
1503int
1504t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1505{
1506    return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1507}
1508
1509/**
1510 *  free_qset - free the resources of an SGE queue set
1511 *  @sc: the controller owning the queue set
1512 *  @q: the queue set
1513 *
1514 *  Release the HW and SW resources associated with an SGE queue set, such
1515 *  as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1516 *  queue set must be quiesced prior to calling this.
1517 */
1518static void
1519t3_free_qset(adapter_t *sc, struct sge_qset *q)
1520{
1521    int i;
1522
1523    for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1524        if (q->fl[i].desc) {
1525            mtx_lock(&sc->sge.reg_lock);
1526            t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1527            mtx_unlock(&sc->sge.reg_lock);
1528            bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1529		INT3;
1530//            bus_dmamem_free(q->fl[i].desc_tag, &q->fl[i].phys_addr, 1);
1531            // XXXXXXXXXXX destroy DMA tags????
1532        }
1533        if (q->fl[i].sdesc) {
1534            free_rx_bufs(sc, &q->fl[i]);
1535            free(q->fl[i].sdesc, M_DEVBUF);
1536        }
1537    }
1538
1539    for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1540        if (q->txq[i].desc) {
1541            mtx_lock(&sc->sge.reg_lock);
1542            t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1543            mtx_unlock(&sc->sge.reg_lock);
1544            bus_dmamap_unload(q->txq[i].desc_tag,
1545                    q->txq[i].desc_map);
1546		INT3;
1547//            bus_dmamem_free(q->txq[i].desc_tag, &q->txq[i].phys_addr, 1);
1548            // XXXXXXXXXXX destroy DMA tags????  And the lock?!??!
1549
1550        }
1551        if (q->txq[i].sdesc) {
1552            free(q->txq[i].sdesc, M_DEVBUF);
1553        }
1554    }
1555
1556    if (q->rspq.desc) {
1557        mtx_lock(&sc->sge.reg_lock);
1558        t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1559        mtx_unlock(&sc->sge.reg_lock);
1560
1561        bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1562	INT3;
1563//        bus_dmamem_free(q->rspq.desc_tag, &q->rspq.phys_addr, 1);
1564        // XXXXXXXXXXX destroy DMA tags???? and the LOCK ?!?!?
1565    }
1566
1567    memset(q, 0, sizeof(*q));
1568}
1569
1570/**
1571 *  t3_free_sge_resources - free SGE resources
1572 *  @sc: the adapter softc
1573 *
1574 *  Frees resources used by the SGE queue sets.
1575 */
1576void
1577t3_free_sge_resources(adapter_t *sc)
1578{
1579    int i, nqsets;
1580
1581    for (nqsets = i = 0; i < (sc)->params.nports; i++)
1582        nqsets += sc->port[i].nqsets;
1583
1584    for (i = 0; i < nqsets; ++i)
1585        t3_free_qset(sc, &sc->sge.qs[i]);
1586}
1587
1588/**
1589 *  t3_sge_start - enable SGE
1590 *  @sc: the controller softc
1591 *
1592 *  Enables the SGE for DMAs.  This is the last step in starting packet
1593 *  transfers.
1594 */
1595void
1596t3_sge_start(adapter_t *sc)
1597{
1598    t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1599}
1600
1601/**
1602 *  t3_sge_stop - disable SGE operation
1603 *  @sc: the adapter
1604 *
1605 *  Disables the DMA engine.  This can be called in emeregencies (e.g.,
1606 *  from error interrupts) or from normal process context.  In the latter
1607 *  case it also disables any pending queue restart tasklets.  Note that
1608 *  if it is called in interrupt context it cannot disable the restart
1609 *  tasklets as it cannot wait, however the tasklets will have no effect
1610 *  since the doorbells are disabled and the driver will call this again
1611 *  later from process context, at which time the tasklets will be stopped
1612 *  if they are still running.
1613 */
1614void
1615t3_sge_stop(adapter_t *sc)
1616{
1617    int i, nqsets;
1618
1619    t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1620
1621    for (nqsets = i = 0; i < (sc)->params.nports; i++)
1622        nqsets += sc->port[i].nqsets;
1623
1624    for (i = 0; i < nqsets; ++i) {
1625    }
1626}
1627
1628
1629/**
1630 *  free_tx_desc - reclaims Tx descriptors and their buffers
1631 *  @adapter: the adapter
1632 *  @q: the Tx queue to reclaim descriptors from
1633 *  @n: the number of descriptors to reclaim
1634 *
1635 *  Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1636 *  Tx buffers.  Called with the Tx queue lock held.
1637 */
1638int
1639free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
1640{
1641    struct tx_sw_desc *d;
1642    unsigned int cidx = q->cidx;
1643    int nbufs = 0;
1644
1645#ifdef T3_TRACE
1646    T3_TRACE2(sc->tb[q->cntxt_id & 7],
1647          "reclaiming %u Tx descriptors at cidx %u", n, cidx);
1648#endif
1649    d = &q->sdesc[cidx];
1650
1651    while (n-- > 0) {
1652        DPRINTF("cidx=%d d=%p\n", cidx, d);
1653        if (d->m) {
1654            if (d->flags & TX_SW_DESC_MAPPED) {
1655                bus_dmamap_unload(q->entry_tag, d->map);
1656                bus_dmamap_destroy(q->entry_tag, d->map);
1657                d->flags &= ~TX_SW_DESC_MAPPED;
1658            }
1659            if (m_get_priority(d->m) == cidx) {
1660                m_vec[nbufs] = d->m;
1661                d->m = NULL;
1662                nbufs++;
1663            } else {
1664                printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
1665            }
1666        }
1667        ++d;
1668        if (++cidx == q->size) {
1669            cidx = 0;
1670            d = q->sdesc;
1671        }
1672    }
1673    q->cidx = cidx;
1674
1675    return (nbufs);
1676}
1677
1678/**
1679 *  is_new_response - check if a response is newly written
1680 *  @r: the response descriptor
1681 *  @q: the response queue
1682 *
1683 *  Returns true if a response descriptor contains a yet unprocessed
1684 *  response.
1685 */
1686static __inline int
1687is_new_response(const struct rsp_desc *r,
1688    const struct sge_rspq *q)
1689{
1690    return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1691}
1692
1693#define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1694#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1695            V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1696            V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1697            V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1698
1699/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1700#define NOMEM_INTR_DELAY 2500
1701
1702/**
1703 *  write_ofld_wr - write an offload work request
1704 *  @adap: the adapter
1705 *  @m: the packet to send
1706 *  @q: the Tx queue
1707 *  @pidx: index of the first Tx descriptor to write
1708 *  @gen: the generation value to use
1709 *  @ndesc: number of descriptors the packet will occupy
1710 *
1711 *  Write an offload work request to send the supplied packet.  The packet
1712 *  data already carry the work request with most fields populated.
1713 */
1714static void
1715write_ofld_wr(adapter_t *adap, struct mbuf *m,
1716    struct sge_txq *q, unsigned int pidx,
1717    unsigned int gen, unsigned int ndesc,
1718    bus_dma_segment_t *segs, unsigned int nsegs)
1719{
1720    unsigned int sgl_flits, flits;
1721    struct work_request_hdr *from;
1722    struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1723    struct tx_desc *d = &q->desc[pidx];
1724    struct txq_state txqs;
1725
1726    if (immediate(m)) {
1727        q->sdesc[pidx].m = NULL;
1728        write_imm(d, m, m->m_len, gen);
1729        return;
1730    }
1731
1732    /* Only TX_DATA builds SGLs */
1733
1734    from = mtod(m, struct work_request_hdr *);
1735    INT3; ///  DEBUG this???
1736    flits = 3; // XXXXXXXXXXXXXX
1737
1738    sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1739
1740    make_sgl(sgp, segs, nsegs);
1741    sgl_flits = sgl_len(nsegs);
1742
1743    txqs.gen = q->gen;
1744    txqs.pidx = q->pidx;
1745    txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1746    write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1747        from->wr_hi, from->wr_lo);
1748}
1749
1750/**
1751 *  calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1752 *  @m: the packet
1753 *
1754 *  Returns the number of Tx descriptors needed for the given offload
1755 *  packet.  These packets are already fully constructed.
1756 */
1757static __inline unsigned int
1758calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1759{
1760    unsigned int flits, cnt = 0;
1761
1762
1763    if (m->m_len <= WR_LEN)
1764        return 1;                 /* packet fits as immediate data */
1765
1766    if (m->m_flags & M_IOVEC)
1767        cnt = mtomv(m)->mv_count;
1768
1769    INT3; // Debug this????
1770    flits = 3; // XXXXXXXXX
1771
1772    return flits_to_desc(flits + sgl_len(cnt));
1773}
1774
1775/**
1776 *  ofld_xmit - send a packet through an offload queue
1777 *  @adap: the adapter
1778 *  @q: the Tx offload queue
1779 *  @m: the packet
1780 *
1781 *  Send an offload packet through an SGE offload queue.
1782 */
1783static int
1784ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1785{
1786    int ret, nsegs;
1787    unsigned int ndesc;
1788    unsigned int pidx, gen;
1789    struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1790    bus_dma_segment_t segs[TX_MAX_SEGS];
1791    int i, cleaned;
1792    struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1793
1794    mtx_lock(&q->lock);
1795    if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
1796        mtx_unlock(&q->lock);
1797        return (ret);
1798    }
1799    ndesc = calc_tx_descs_ofld(m, nsegs);
1800again:  cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
1801
1802    ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1803    if (__predict_false(ret)) {
1804        if (ret == 1) {
1805            m_set_priority(m, ndesc);     /* save for restart */
1806            mtx_unlock(&q->lock);
1807            return EINTR;
1808        }
1809        goto again;
1810    }
1811
1812    gen = q->gen;
1813    q->in_use += ndesc;
1814    pidx = q->pidx;
1815    q->pidx += ndesc;
1816    if (q->pidx >= q->size) {
1817        q->pidx -= q->size;
1818        q->gen ^= 1;
1819    }
1820#ifdef T3_TRACE
1821    T3_TRACE5(adap->tb[q->cntxt_id & 7],
1822          "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
1823          ndesc, pidx, skb->len, skb->len - skb->data_len,
1824          skb_shinfo(skb)->nr_frags);
1825#endif
1826    mtx_unlock(&q->lock);
1827
1828    write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1829    check_ring_tx_db(adap, q);
1830
1831    for (i = 0; i < cleaned; i++) {
1832        m_freem_vec(m_vec[i]);
1833    }
1834    return (0);
1835}
1836
1837/**
1838 *  restart_offloadq - restart a suspended offload queue
1839 *  @qs: the queue set cotaining the offload queue
1840 *
1841 *  Resumes transmission on a suspended Tx offload queue.
1842 */
1843static void
1844restart_offloadq(struct work *wk, void *data)
1845{
1846
1847    struct mbuf *m;
1848    struct sge_qset *qs = data;
1849    struct sge_txq *q = &qs->txq[TXQ_OFLD];
1850    adapter_t *adap = qs->port->adapter;
1851    struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1852    bus_dma_segment_t segs[TX_MAX_SEGS];
1853    int nsegs, i, cleaned;
1854    struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1855
1856    mtx_lock(&q->lock);
1857again:  cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
1858
1859    while ((m = mbufq_peek(&q->sendq)) != NULL) {
1860        unsigned int gen, pidx;
1861        unsigned int ndesc = m_get_priority(m);
1862
1863        if (__predict_false(q->size - q->in_use < ndesc)) {
1864            setbit(&qs->txq_stopped, TXQ_OFLD);
1865            smp_mb();
1866
1867            if (should_restart_tx(q) &&
1868                test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1869                goto again;
1870            q->stops++;
1871            break;
1872        }
1873
1874        gen = q->gen;
1875        q->in_use += ndesc;
1876        pidx = q->pidx;
1877        q->pidx += ndesc;
1878        if (q->pidx >= q->size) {
1879            q->pidx -= q->size;
1880            q->gen ^= 1;
1881        }
1882
1883        (void)mbufq_dequeue(&q->sendq);
1884        busdma_map_mbufs(&m, q, stx, segs, &nsegs);
1885        mtx_unlock(&q->lock);
1886        write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1887        mtx_lock(&q->lock);
1888    }
1889    mtx_unlock(&q->lock);
1890
1891#if USE_GTS
1892    set_bit(TXQ_RUNNING, &q->flags);
1893    set_bit(TXQ_LAST_PKT_DB, &q->flags);
1894#endif
1895    t3_write_reg(adap, A_SG_KDOORBELL,
1896             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1897
1898    for (i = 0; i < cleaned; i++) {
1899        m_freem_vec(m_vec[i]);
1900    }
1901}
1902
1903/**
1904 *  queue_set - return the queue set a packet should use
1905 *  @m: the packet
1906 *
1907 *  Maps a packet to the SGE queue set it should use.  The desired queue
1908 *  set is carried in bits 1-3 in the packet's priority.
1909 */
1910static __inline int
1911queue_set(const struct mbuf *m)
1912{
1913    return m_get_priority(m) >> 1;
1914}
1915
1916/**
1917 *  is_ctrl_pkt - return whether an offload packet is a control packet
1918 *  @m: the packet
1919 *
1920 *  Determines whether an offload packet should use an OFLD or a CTRL
1921 *  Tx queue.  This is indicated by bit 0 in the packet's priority.
1922 */
1923static __inline int
1924is_ctrl_pkt(const struct mbuf *m)
1925{
1926    return m_get_priority(m) & 1;
1927}
1928
1929/**
1930 *  t3_offload_tx - send an offload packet
1931 *  @tdev: the offload device to send to
1932 *  @m: the packet
1933 *
1934 *  Sends an offload packet.  We use the packet priority to select the
1935 *  appropriate Tx queue as follows: bit 0 indicates whether the packet
1936 *  should be sent as regular or control, bits 1-3 select the queue set.
1937 */
1938int
1939t3_offload_tx(struct toedev *tdev, struct mbuf *m)
1940{
1941    adapter_t *adap = tdev2adap(tdev);
1942    struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
1943
1944    if (__predict_false(is_ctrl_pkt(m)))
1945        return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
1946
1947    return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
1948}
1949
1950static void
1951restart_tx(struct sge_qset *qs)
1952{
1953    if (isset(&qs->txq_stopped, TXQ_OFLD) &&
1954        should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1955        test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1956        qs->txq[TXQ_OFLD].restarts++;
1957        workqueue_enqueue(qs->txq[TXQ_OFLD].qresume_task.wq, &qs->txq[TXQ_OFLD].qresume_task.w, NULL);
1958    }
1959    if (isset(&qs->txq_stopped, TXQ_CTRL) &&
1960        should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1961        test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1962        qs->txq[TXQ_CTRL].restarts++;
1963        workqueue_enqueue(qs->txq[TXQ_CTRL].qresume_task.wq, &qs->txq[TXQ_CTRL].qresume_task.w, NULL);
1964    }
1965}
1966
1967/**
1968 *  t3_sge_alloc_qset - initialize an SGE queue set
1969 *  @sc: the controller softc
1970 *  @id: the queue set id
1971 *  @nports: how many Ethernet ports will be using this queue set
1972 *  @irq_vec_idx: the IRQ vector index for response queue interrupts
1973 *  @p: configuration parameters for this queue set
1974 *  @ntxq: number of Tx queues for the queue set
1975 *  @pi: port info for queue set
1976 *
1977 *  Allocate resources and initialize an SGE queue set.  A queue set
1978 *  comprises a response queue, two Rx free-buffer queues, and up to 3
1979 *  Tx queues.  The Tx queues are assigned roles in the order Ethernet
1980 *  queue, offload queue, and control queue.
1981 */
1982int
1983t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
1984          const struct qset_params *p, int ntxq, struct port_info *pi)
1985{
1986    struct sge_qset *q = &sc->sge.qs[id];
1987    int i, ret = 0;
1988
1989    init_qset_cntxt(q, id);
1990
1991    if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
1992            sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
1993            &q->fl[0].desc, &q->fl[0].sdesc,
1994            &q->fl[0].desc_tag, &q->fl[0].desc_map,
1995            sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
1996        goto err;
1997    }
1998
1999    if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2000            sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2001            &q->fl[1].desc, &q->fl[1].sdesc,
2002            &q->fl[1].desc_tag, &q->fl[1].desc_map,
2003            sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2004        goto err;
2005    }
2006
2007    if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2008            &q->rspq.phys_addr, &q->rspq.desc, NULL,
2009            &q->rspq.desc_tag, &q->rspq.desc_map,
2010            NULL, NULL)) != 0) {
2011        goto err;
2012    }
2013
2014    for (i = 0; i < ntxq; ++i) {
2015        /*
2016         * The control queue always uses immediate data so does not
2017         * need to keep track of any mbufs.
2018         * XXX Placeholder for future TOE support.
2019         */
2020        size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2021
2022        if ((ret = alloc_ring(sc, p->txq_size[i],
2023                sizeof(struct tx_desc), sz,
2024                &q->txq[i].phys_addr, &q->txq[i].desc,
2025                &q->txq[i].sdesc, &q->txq[i].desc_tag,
2026                &q->txq[i].desc_map,
2027                sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2028            goto err;
2029        }
2030        mbufq_init(&q->txq[i].sendq);
2031        q->txq[i].gen = 1;
2032        q->txq[i].size = p->txq_size[i];
2033        snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2034            0, irq_vec_idx, i);
2035        MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2036    }
2037
2038    q->txq[TXQ_ETH].port = pi;
2039
2040    q->txq[TXQ_OFLD].qresume_task.name = "restart_offloadq";
2041    q->txq[TXQ_OFLD].qresume_task.func = restart_offloadq;
2042    q->txq[TXQ_OFLD].qresume_task.context = q;
2043    kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qresume_task, NULL, "cxgb_make_task");
2044
2045    q->txq[TXQ_CTRL].qresume_task.name = "restart_ctrlq";
2046    q->txq[TXQ_CTRL].qresume_task.func = restart_ctrlq;
2047    q->txq[TXQ_CTRL].qresume_task.context = q;
2048    kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_CTRL].qresume_task, NULL, "cxgb_make_task");
2049
2050    q->txq[TXQ_ETH].qreclaim_task.name = "sge_txq_reclaim_handler";
2051    q->txq[TXQ_ETH].qreclaim_task.func = sge_txq_reclaim_handler;
2052    q->txq[TXQ_ETH].qreclaim_task.context = &q->txq[TXQ_ETH];
2053    kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_ETH].qreclaim_task, NULL, "cxgb_make_task");
2054
2055    q->txq[TXQ_OFLD].qreclaim_task.name = "sge_txq_reclaim_handler";
2056    q->txq[TXQ_OFLD].qreclaim_task.func = sge_txq_reclaim_handler;
2057    q->txq[TXQ_OFLD].qreclaim_task.context = &q->txq[TXQ_OFLD];
2058    kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qreclaim_task, NULL, "cxgb_make_task");
2059
2060    q->fl[0].gen = q->fl[1].gen = 1;
2061    q->fl[0].size = p->fl_size;
2062    q->fl[1].size = p->jumbo_size;
2063
2064    q->rspq.gen = 1;
2065    q->rspq.cidx = 0;
2066    q->rspq.size = p->rspq_size;
2067
2068    q->txq[TXQ_ETH].stop_thres = nports *
2069        flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2070
2071    q->fl[0].buf_size = MCLBYTES;
2072    q->fl[1].buf_size = MJUMPAGESIZE;
2073
2074    q->lro.enabled = lro_default;
2075
2076    mtx_lock(&sc->sge.reg_lock);
2077    ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2078                   q->rspq.phys_addr, q->rspq.size,
2079                   q->fl[0].buf_size, 1, 0);
2080    if (ret) {
2081        printf("error %d from t3_sge_init_rspcntxt\n", ret);
2082        goto err_unlock;
2083    }
2084
2085    for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2086        ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2087                      q->fl[i].phys_addr, q->fl[i].size,
2088                      q->fl[i].buf_size, p->cong_thres, 1,
2089                      0);
2090        if (ret) {
2091            printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2092            goto err_unlock;
2093        }
2094    }
2095
2096    ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2097                 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2098                 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2099                 1, 0);
2100    if (ret) {
2101        printf("error %d from t3_sge_init_ecntxt\n", ret);
2102        goto err_unlock;
2103    }
2104
2105    if (ntxq > 1) {
2106        ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2107                     USE_GTS, SGE_CNTXT_OFLD, id,
2108                     q->txq[TXQ_OFLD].phys_addr,
2109                     q->txq[TXQ_OFLD].size, 0, 1, 0);
2110        if (ret) {
2111            printf("error %d from t3_sge_init_ecntxt\n", ret);
2112            goto err_unlock;
2113        }
2114    }
2115
2116    if (ntxq > 2) {
2117        ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2118                     SGE_CNTXT_CTRL, id,
2119                     q->txq[TXQ_CTRL].phys_addr,
2120                     q->txq[TXQ_CTRL].size,
2121                     q->txq[TXQ_CTRL].token, 1, 0);
2122        if (ret) {
2123            printf("error %d from t3_sge_init_ecntxt\n", ret);
2124            goto err_unlock;
2125        }
2126    }
2127
2128    snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2129        0, irq_vec_idx);
2130    MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2131
2132    mtx_unlock(&sc->sge.reg_lock);
2133    t3_update_qset_coalesce(q, p);
2134    q->port = pi;
2135
2136    refill_fl(sc, &q->fl[0], q->fl[0].size);
2137    refill_fl(sc, &q->fl[1], q->fl[1].size);
2138    refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2139
2140    t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2141             V_NEWTIMER(q->rspq.holdoff_tmr));
2142
2143    return (0);
2144
2145err_unlock:
2146    mtx_unlock(&sc->sge.reg_lock);
2147err:
2148    t3_free_qset(sc, q);
2149
2150    return (ret);
2151}
2152
2153void
2154t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2155{
2156    struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2157    struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2158    struct ifnet *ifp = pi->ifp;
2159
2160    DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2161
2162    /*
2163     * XXX need to add VLAN support for 6.x
2164     */
2165#ifdef VLAN_SUPPORTED
2166    if (__predict_false(cpl->vlan_valid)) {
2167        m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2168        m->m_flags |= M_VLANTAG;
2169    }
2170#endif
2171
2172    m->m_pkthdr.rcvif = ifp;
2173    m_explode(m);
2174    /*
2175     * adjust after conversion to mbuf chain
2176     */
2177    m_adj(m, sizeof(*cpl) + ethpad);
2178
2179    (*ifp->if_input)(ifp, m);
2180}
2181
2182/**
2183 *  get_packet - return the next ingress packet buffer from a free list
2184 *  @adap: the adapter that received the packet
2185 *  @drop_thres: # of remaining buffers before we start dropping packets
2186 *  @qs: the qset that the SGE free list holding the packet belongs to
2187 *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2188 *      @r: response descriptor
2189 *
2190 *  Get the next packet from a free list and complete setup of the
2191 *  sk_buff.  If the packet is small we make a copy and recycle the
2192 *  original buffer, otherwise we use the original buffer itself.  If a
2193 *  positive drop threshold is supplied packets are dropped and their
2194 *  buffers recycled if (a) the number of remaining buffers is under the
2195 *  threshold and the packet is too big to copy, or (b) the packet should
2196 *  be copied but there is no memory for the copy.
2197 */
2198#ifdef DISABLE_MBUF_IOVEC
2199
2200static int
2201get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2202    struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m)
2203{
2204
2205    unsigned int len_cq =  ntohl(r->len_cq);
2206    struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2207    struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2208    uint32_t len = G_RSPD_LEN(len_cq);
2209    uint32_t flags = ntohl(r->flags);
2210    uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2211    int ret = 0;
2212
2213    prefetch(sd->cl);
2214
2215    fl->credits--;
2216    bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD);
2217    bus_dmamap_unload(fl->entry_tag, sd->map);
2218
2219    m->m_len = len;
2220    m_cljset(m, sd->cl, fl->type);
2221
2222    switch(sopeop) {
2223    case RSPQ_SOP_EOP:
2224        DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2225        mh->mh_head = mh->mh_tail = m;
2226        m->m_pkthdr.len = len;
2227        m->m_flags |= M_PKTHDR;
2228        ret = 1;
2229        break;
2230    case RSPQ_NSOP_NEOP:
2231        DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2232        m->m_flags &= ~M_PKTHDR;
2233        if (mh->mh_tail == NULL) {
2234            if (cxgb_debug)
2235                printf("discarding intermediate descriptor entry\n");
2236            m_freem(m);
2237            break;
2238        }
2239        mh->mh_tail->m_next = m;
2240        mh->mh_tail = m;
2241        mh->mh_head->m_pkthdr.len += len;
2242        ret = 0;
2243        break;
2244    case RSPQ_SOP:
2245        DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2246        m->m_pkthdr.len = len;
2247        mh->mh_head = mh->mh_tail = m;
2248        m->m_flags |= M_PKTHDR;
2249        ret = 0;
2250        break;
2251    case RSPQ_EOP:
2252        DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2253        m->m_flags &= ~M_PKTHDR;
2254        mh->mh_head->m_pkthdr.len += len;
2255        mh->mh_tail->m_next = m;
2256        mh->mh_tail = m;
2257        ret = 1;
2258        break;
2259    }
2260    if (++fl->cidx == fl->size)
2261        fl->cidx = 0;
2262
2263    return (ret);
2264}
2265
2266#else
2267static int
2268get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2269    struct mbuf *m, struct rsp_desc *r)
2270{
2271
2272    unsigned int len_cq =  ntohl(r->len_cq);
2273    struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2274    struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2275    uint32_t len = G_RSPD_LEN(len_cq);
2276    uint32_t flags = ntohl(r->flags);
2277    uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2278    void *cl;
2279    int ret = 0;
2280
2281    prefetch(sd->cl);
2282
2283    fl->credits--;
2284    bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD);
2285
2286    if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2287        cl = mtod(m, void *);
2288        memcpy(cl, sd->cl, len);
2289        recycle_rx_buf(adap, fl, fl->cidx);
2290    } else {
2291        cl = sd->cl;
2292        bus_dmamap_unload(fl->entry_tag, sd->map);
2293    }
2294    switch(sopeop) {
2295    case RSPQ_SOP_EOP:
2296        DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2297        m->m_len = m->m_pkthdr.len = len;
2298        if (cl == sd->cl)
2299            m_cljset(m, cl, fl->type);
2300        ret = 1;
2301        goto done;
2302        break;
2303    case RSPQ_NSOP_NEOP:
2304        DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2305        ret = 0;
2306        break;
2307    case RSPQ_SOP:
2308        DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2309        m_iovinit(m);
2310        ret = 0;
2311        break;
2312    case RSPQ_EOP:
2313        DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2314        ret = 1;
2315        break;
2316    }
2317    m_iovappend(m, cl, fl->buf_size, len, 0);
2318
2319done:
2320    if (++fl->cidx == fl->size)
2321        fl->cidx = 0;
2322
2323    return (ret);
2324}
2325#endif
2326/**
2327 *  handle_rsp_cntrl_info - handles control information in a response
2328 *  @qs: the queue set corresponding to the response
2329 *  @flags: the response control flags
2330 *
2331 *  Handles the control information of an SGE response, such as GTS
2332 *  indications and completion credits for the queue set's Tx queues.
2333 *  HW coalesces credits, we don't do any extra SW coalescing.
2334 */
2335static __inline void
2336handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2337{
2338    unsigned int credits;
2339
2340#if USE_GTS
2341    if (flags & F_RSPD_TXQ0_GTS)
2342        clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2343#endif
2344    credits = G_RSPD_TXQ0_CR(flags);
2345    if (credits) {
2346        qs->txq[TXQ_ETH].processed += credits;
2347        if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
2348            workqueue_enqueue(qs->port->timer_reclaim_task.wq,
2349                             &qs->port->timer_reclaim_task.w, NULL);
2350    }
2351
2352    credits = G_RSPD_TXQ2_CR(flags);
2353    if (credits)
2354        qs->txq[TXQ_CTRL].processed += credits;
2355
2356# if USE_GTS
2357    if (flags & F_RSPD_TXQ1_GTS)
2358        clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2359# endif
2360    credits = G_RSPD_TXQ1_CR(flags);
2361    if (credits)
2362        qs->txq[TXQ_OFLD].processed += credits;
2363}
2364
2365static void
2366check_ring_db(adapter_t *adap, struct sge_qset *qs,
2367    unsigned int sleeping)
2368{
2369    ;
2370}
2371
2372/**
2373 *  process_responses - process responses from an SGE response queue
2374 *  @adap: the adapter
2375 *  @qs: the queue set to which the response queue belongs
2376 *  @budget: how many responses can be processed in this round
2377 *
2378 *  Process responses from an SGE response queue up to the supplied budget.
2379 *  Responses include received packets as well as credits and other events
2380 *  for the queues that belong to the response queue's queue set.
2381 *  A negative budget is effectively unlimited.
2382 *
2383 *  Additionally choose the interrupt holdoff time for the next interrupt
2384 *  on this queue.  If the system is under memory shortage use a fairly
2385 *  long delay to help recovery.
2386 */
2387static int
2388process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2389{
2390    struct sge_rspq *rspq = &qs->rspq;
2391    struct rsp_desc *r = &rspq->desc[rspq->cidx];
2392    int budget_left = budget;
2393    unsigned int sleeping = 0;
2394    int lro = qs->lro.enabled;
2395#ifdef DEBUG
2396    static int last_holdoff = 0;
2397    if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2398        printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2399        last_holdoff = rspq->holdoff_tmr;
2400    }
2401#endif
2402    rspq->next_holdoff = rspq->holdoff_tmr;
2403
2404    while (__predict_true(budget_left && is_new_response(r, rspq))) {
2405        int eth, eop = 0, ethpad = 0;
2406        uint32_t flags = ntohl(r->flags);
2407        uint32_t rss_csum = *(const uint32_t *)r;
2408        uint32_t rss_hash = r->rss_hdr.rss_hash_val;
2409
2410        eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2411
2412        if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2413            /* XXX */
2414        } else if  (flags & F_RSPD_IMM_DATA_VALID) {
2415#ifdef DISABLE_MBUF_IOVEC
2416            if (cxgb_debug)
2417                printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx);
2418
2419            if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) {
2420                rspq->next_holdoff = NOMEM_INTR_DELAY;
2421                budget_left--;
2422                break;
2423            } else {
2424                eop = 1;
2425            }
2426#else
2427            struct mbuf *m = NULL;
2428
2429            if (rspq->rspq_mbuf == NULL)
2430                rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2431                        else
2432                m = m_gethdr(M_DONTWAIT, MT_DATA);
2433
2434            /*
2435             * XXX revisit me
2436             */
2437            if (rspq->rspq_mbuf == NULL &&  m == NULL) {
2438                rspq->next_holdoff = NOMEM_INTR_DELAY;
2439                budget_left--;
2440                break;
2441            }
2442            if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags))
2443                goto skip;
2444            eop = 1;
2445#endif
2446            rspq->imm_data++;
2447        } else if (r->len_cq) {
2448            int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2449
2450#ifdef DISABLE_MBUF_IOVEC
2451            struct mbuf *m;
2452            m = m_gethdr(M_NOWAIT, MT_DATA);
2453
2454            if (m == NULL) {
2455                log(LOG_WARNING, "failed to get mbuf for packet\n");
2456                break;
2457            }
2458
2459            eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m);
2460#else
2461            if (rspq->rspq_mbuf == NULL)
2462                rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2463            if (rspq->rspq_mbuf == NULL) {
2464                log(LOG_WARNING, "failed to get mbuf for packet\n");
2465                break;
2466            }
2467            eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r);
2468#endif
2469            ethpad = 2;
2470        } else {
2471            DPRINTF("pure response\n");
2472            rspq->pure_rsps++;
2473        }
2474
2475        if (flags & RSPD_CTRL_MASK) {
2476            sleeping |= flags & RSPD_GTS_MASK;
2477            handle_rsp_cntrl_info(qs, flags);
2478        }
2479#ifndef DISABLE_MBUF_IOVEC
2480    skip:
2481#endif
2482        r++;
2483        if (__predict_false(++rspq->cidx == rspq->size)) {
2484            rspq->cidx = 0;
2485            rspq->gen ^= 1;
2486            r = rspq->desc;
2487        }
2488
2489        prefetch(r);
2490        if (++rspq->credits >= (rspq->size / 4)) {
2491            refill_rspq(adap, rspq, rspq->credits);
2492            rspq->credits = 0;
2493        }
2494
2495        if (eop) {
2496            prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
2497            prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
2498
2499            if (eth) {
2500                t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
2501                    rss_hash, rss_csum, lro);
2502
2503                rspq->rspq_mh.mh_head = NULL;
2504            } else {
2505                rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2506                /*
2507                 * XXX size mismatch
2508                 */
2509                m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2510            }
2511            __refill_fl(adap, &qs->fl[0]);
2512            __refill_fl(adap, &qs->fl[1]);
2513
2514        }
2515        --budget_left;
2516    }
2517
2518    t3_lro_flush(adap, qs, &qs->lro);
2519
2520    if (sleeping)
2521        check_ring_db(adap, qs, sleeping);
2522
2523    smp_mb();  /* commit Tx queue processed updates */
2524    if (__predict_false(qs->txq_stopped != 0))
2525        restart_tx(qs);
2526
2527    budget -= budget_left;
2528    return (budget);
2529}
2530
2531/*
2532 * A helper function that processes responses and issues GTS.
2533 */
2534static __inline int
2535process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2536{
2537    int work;
2538    static int last_holdoff = 0;
2539
2540    work = process_responses(adap, rspq_to_qset(rq), -1);
2541
2542    if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2543        printf("next_holdoff=%d\n", rq->next_holdoff);
2544        last_holdoff = rq->next_holdoff;
2545    }
2546    if (work)
2547        t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2548            V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2549    return work;
2550}
2551
2552
2553/*
2554 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2555 * Handles data events from SGE response queues as well as error and other
2556 * async events as they all use the same interrupt pin.  We use one SGE
2557 * response queue per port in this mode and protect all response queues with
2558 * queue 0's lock.
2559 */
2560int
2561t3b_intr(void *data)
2562{
2563    uint32_t i, map;
2564    adapter_t *adap = data;
2565    struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2566
2567    t3_write_reg(adap, A_PL_CLI, 0);
2568    map = t3_read_reg(adap, A_SG_DATA_INTR);
2569
2570    if (!map)
2571        return (FALSE);
2572
2573    if (__predict_false(map & F_ERRINTR))
2574        workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL);
2575
2576    mtx_lock(&q0->lock);
2577    for_each_port(adap, i)
2578        if (map & (1 << i))
2579            process_responses_gts(adap, &adap->sge.qs[i].rspq);
2580    mtx_unlock(&q0->lock);
2581
2582    return (TRUE);
2583}
2584
2585/*
2586 * The MSI interrupt handler.  This needs to handle data events from SGE
2587 * response queues as well as error and other async events as they all use
2588 * the same MSI vector.  We use one SGE response queue per port in this mode
2589 * and protect all response queues with queue 0's lock.
2590 */
2591int
2592t3_intr_msi(void *data)
2593{
2594    adapter_t *adap = data;
2595    struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2596    int i, new_packets = 0;
2597
2598    mtx_lock(&q0->lock);
2599
2600    for_each_port(adap, i)
2601        if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
2602            new_packets = 1;
2603    mtx_unlock(&q0->lock);
2604    if (new_packets == 0)
2605        workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL);
2606
2607    return (TRUE);
2608}
2609
2610int
2611t3_intr_msix(void *data)
2612{
2613    struct sge_qset *qs = data;
2614    adapter_t *adap = qs->port->adapter;
2615    struct sge_rspq *rspq = &qs->rspq;
2616
2617    mtx_lock(&rspq->lock);
2618    if (process_responses_gts(adap, rspq) == 0)
2619        rspq->unhandled_irqs++;
2620    mtx_unlock(&rspq->lock);
2621
2622    return (TRUE);
2623}
2624
2625/**
2626 *  t3_get_desc - dump an SGE descriptor for debugging purposes
2627 *  @qs: the queue set
2628 *  @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2629 *  @idx: the descriptor index in the queue
2630 *  @data: where to dump the descriptor contents
2631 *
2632 *  Dumps the contents of a HW descriptor of an SGE queue.  Returns the
2633 *  size of the descriptor.
2634 */
2635int
2636t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2637        unsigned char *data)
2638{
2639    if (qnum >= 6)
2640        return (EINVAL);
2641
2642    if (qnum < 3) {
2643        if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2644            return -EINVAL;
2645        memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2646        return sizeof(struct tx_desc);
2647    }
2648
2649    if (qnum == 3) {
2650        if (!qs->rspq.desc || idx >= qs->rspq.size)
2651            return (EINVAL);
2652        memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2653        return sizeof(struct rsp_desc);
2654    }
2655
2656    qnum -= 4;
2657    if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2658        return (EINVAL);
2659    memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2660    return sizeof(struct rx_desc);
2661}
2662