1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__KERNEL_RCSID(0, "$NetBSD: cxgb_sge.c,v 1.1 2010/03/21 21:11:13 jklos Exp $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/kernel.h> 36#include <sys/conf.h> 37#include <sys/bus.h> 38#include <sys/queue.h> 39#include <sys/sysctl.h> 40 41#include <sys/proc.h> 42#include <sys/sched.h> 43#include <sys/systm.h> 44 45#include <netinet/in_systm.h> 46#include <netinet/in.h> 47#include <netinet/ip.h> 48#include <netinet/tcp.h> 49 50#include <dev/pci/pcireg.h> 51#include <dev/pci/pcivar.h> 52 53#ifdef CONFIG_DEFINED 54#include <cxgb_include.h> 55#else 56#include <dev/pci/cxgb/cxgb_include.h> 57#endif 58 59uint32_t collapse_free = 0; 60uint32_t mb_free_vec_free = 0; 61int txq_fills = 0; 62int collapse_mbufs = 0; 63static int bogus_imm = 0; 64#ifndef DISABLE_MBUF_IOVEC 65static int recycle_enable = 1; 66#endif 67 68#define USE_GTS 0 69 70#define SGE_RX_SM_BUF_SIZE 1536 71#define SGE_RX_DROP_THRES 16 72#define SGE_RX_COPY_THRES 128 73 74/* 75 * Period of the Tx buffer reclaim timer. This timer does not need to run 76 * frequently as Tx buffers are usually reclaimed by new Tx packets. 77 */ 78#define TX_RECLAIM_PERIOD (hz >> 1) 79 80/* 81 * work request size in bytes 82 */ 83#define WR_LEN (WR_FLITS * 8) 84 85/* 86 * Values for sge_txq.flags 87 */ 88enum { 89 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 90 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 91}; 92 93struct tx_desc { 94 uint64_t flit[TX_DESC_FLITS]; 95} __packed; 96 97struct rx_desc { 98 uint32_t addr_lo; 99 uint32_t len_gen; 100 uint32_t gen2; 101 uint32_t addr_hi; 102} __packed; 103 104struct rsp_desc { /* response queue descriptor */ 105 struct rss_header rss_hdr; 106 uint32_t flags; 107 uint32_t len_cq; 108 uint8_t imm_data[47]; 109 uint8_t intr_gen; 110} __packed; 111 112#define RX_SW_DESC_MAP_CREATED (1 << 0) 113#define TX_SW_DESC_MAP_CREATED (1 << 1) 114#define RX_SW_DESC_INUSE (1 << 3) 115#define TX_SW_DESC_MAPPED (1 << 4) 116 117#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 118#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 119#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 120#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 121 122struct tx_sw_desc { /* SW state per Tx descriptor */ 123 struct mbuf *m; 124 bus_dma_segment_t segs[1]; 125 bus_dmamap_t map; 126 int flags; 127}; 128 129struct rx_sw_desc { /* SW state per Rx descriptor */ 130 void *cl; 131 bus_dmamap_t map; 132 int flags; 133}; 134 135struct txq_state { 136 unsigned int compl; 137 unsigned int gen; 138 unsigned int pidx; 139}; 140 141/* 142 * Maps a number of flits to the number of Tx descriptors that can hold them. 143 * The formula is 144 * 145 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 146 * 147 * HW allows up to 4 descriptors to be combined into a WR. 148 */ 149static uint8_t flit_desc_map[] = { 150 0, 151#if SGE_NUM_GENBITS == 1 152 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 153 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 154 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 155 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 156#elif SGE_NUM_GENBITS == 2 157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 158 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 159 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 160 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 161#else 162# error "SGE_NUM_GENBITS must be 1 or 2" 163#endif 164}; 165 166 167static int lro_default = 0; 168int cxgb_debug = 0; 169 170static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 171static void sge_timer_cb(void *arg); 172static void sge_timer_reclaim(struct work *wk, void *arg); 173static void sge_txq_reclaim_handler(struct work *wk, void *arg); 174static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec); 175 176/** 177 * reclaim_completed_tx - reclaims completed Tx descriptors 178 * @adapter: the adapter 179 * @q: the Tx queue to reclaim completed descriptors from 180 * 181 * Reclaims Tx descriptors that the SGE has indicated it has processed, 182 * and frees the associated buffers if possible. Called with the Tx 183 * queue's lock held. 184 */ 185static __inline int 186reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec) 187{ 188 int reclaimed, reclaim = desc_reclaimable(q); 189 int n = 0; 190 191 mtx_assert(&q->lock, MA_OWNED); 192 if (reclaim > 0) { 193 n = free_tx_desc(q, min(reclaim, nbufs), mvec); 194 reclaimed = min(reclaim, nbufs); 195 q->cleaned += reclaimed; 196 q->in_use -= reclaimed; 197 } 198 return (n); 199} 200 201/** 202 * should_restart_tx - are there enough resources to restart a Tx queue? 203 * @q: the Tx queue 204 * 205 * Checks if there are enough descriptors to restart a suspended Tx queue. 206 */ 207static __inline int 208should_restart_tx(const struct sge_txq *q) 209{ 210 unsigned int r = q->processed - q->cleaned; 211 212 return q->in_use - r < (q->size >> 1); 213} 214 215/** 216 * t3_sge_init - initialize SGE 217 * @adap: the adapter 218 * @p: the SGE parameters 219 * 220 * Performs SGE initialization needed every time after a chip reset. 221 * We do not initialize any of the queue sets here, instead the driver 222 * top-level must request those individually. We also do not enable DMA 223 * here, that should be done after the queues have been set up. 224 */ 225void 226t3_sge_init(adapter_t *adap, struct sge_params *p) 227{ 228 u_int ctrl, ups; 229 230 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 231 232 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 233 F_CQCRDTCTRL | 234 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 235 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 236#if SGE_NUM_GENBITS == 1 237 ctrl |= F_EGRGENCTRL; 238#endif 239 if (adap->params.rev > 0) { 240 if (!(adap->flags & (USING_MSIX | USING_MSI))) 241 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 242 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 243 } 244 t3_write_reg(adap, A_SG_CONTROL, ctrl); 245 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 246 V_LORCQDRBTHRSH(512)); 247 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 248 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 249 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 250 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 251 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 252 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 253 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 254 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 255 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 256} 257 258 259/** 260 * sgl_len - calculates the size of an SGL of the given capacity 261 * @n: the number of SGL entries 262 * 263 * Calculates the number of flits needed for a scatter/gather list that 264 * can hold the given number of entries. 265 */ 266static __inline unsigned int 267sgl_len(unsigned int n) 268{ 269 return ((3 * n) / 2 + (n & 1)); 270} 271 272/** 273 * get_imm_packet - return the next ingress packet buffer from a response 274 * @resp: the response descriptor containing the packet data 275 * 276 * Return a packet containing the immediate data of the given response. 277 */ 278#ifdef DISABLE_MBUF_IOVEC 279static __inline int 280get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh) 281{ 282 struct mbuf *m; 283 int len; 284 uint32_t flags = ntohl(resp->flags); 285 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 286 287 /* 288 * would be a firmware bug 289 */ 290 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 291 return (0); 292 293 m = m_gethdr(M_NOWAIT, MT_DATA); 294 len = G_RSPD_LEN(ntohl(resp->len_cq)); 295 296 if (m) { 297 MH_ALIGN(m, IMMED_PKT_SIZE); 298 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE); 299 m->m_len = len; 300 301 switch (sopeop) { 302 case RSPQ_SOP_EOP: 303 mh->mh_head = mh->mh_tail = m; 304 m->m_pkthdr.len = len; 305 m->m_flags |= M_PKTHDR; 306 break; 307 case RSPQ_EOP: 308 m->m_flags &= ~M_PKTHDR; 309 mh->mh_head->m_pkthdr.len += len; 310 mh->mh_tail->m_next = m; 311 mh->mh_tail = m; 312 break; 313 } 314 } 315 return (m != NULL); 316} 317 318#else 319static int 320get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags) 321{ 322 int len, error; 323 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 324 325 /* 326 * would be a firmware bug 327 */ 328 len = G_RSPD_LEN(ntohl(resp->len_cq)); 329 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) { 330 if (cxgb_debug) 331 device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len); 332 bogus_imm++; 333 return (EINVAL); 334 } 335 error = 0; 336 switch (sopeop) { 337 case RSPQ_SOP_EOP: 338 m->m_len = m->m_pkthdr.len = len; 339 memcpy(mtod(m, uint8_t *), resp->imm_data, len); 340 break; 341 case RSPQ_EOP: 342 memcpy(cl, resp->imm_data, len); 343 m_iovappend(m, cl, MSIZE, len, 0); 344 break; 345 default: 346 bogus_imm++; 347 error = EINVAL; 348 } 349 350 return (error); 351} 352#endif 353 354static __inline u_int 355flits_to_desc(u_int n) 356{ 357 return (flit_desc_map[n]); 358} 359 360void 361t3_sge_err_intr_handler(adapter_t *adapter) 362{ 363 unsigned int v, status; 364 365 366 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 367 368 if (status & F_RSPQCREDITOVERFOW) 369 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 370 371 if (status & F_RSPQDISABLED) { 372 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 373 374 CH_ALERT(adapter, 375 "packet delivered to disabled response queue (0x%x)\n", 376 (v >> S_RSPQ0DISABLED) & 0xff); 377 } 378 379 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 380 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 381 t3_fatal_err(adapter); 382} 383 384void 385t3_sge_prep(adapter_t *adap, struct sge_params *p) 386{ 387 int i; 388 389 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 390 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 391 392 for (i = 0; i < SGE_QSETS; ++i) { 393 struct qset_params *q = p->qset + i; 394 395 q->polling = adap->params.rev > 0; 396 397 if (adap->params.nports > 2) 398 q->coalesce_nsecs = 50000; 399 else 400 q->coalesce_nsecs = 5000; 401 402 q->rspq_size = RSPQ_Q_SIZE; 403 q->fl_size = FL_Q_SIZE; 404 q->jumbo_size = JUMBO_Q_SIZE; 405 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 406 q->txq_size[TXQ_OFLD] = 1024; 407 q->txq_size[TXQ_CTRL] = 256; 408 q->cong_thres = 0; 409 } 410} 411 412int 413t3_sge_alloc(adapter_t *sc) 414{ 415 /* The parent tag. */ 416 sc->parent_dmat = sc->pa.pa_dmat; 417 418 /* 419 * DMA tag for normal sized RX frames 420 */ 421 sc->rx_dmat = sc->pa.pa_dmat; 422 423 /* 424 * DMA tag for jumbo sized RX frames. 425 */ 426 sc->rx_jumbo_dmat = sc->pa.pa_dmat; 427 428 /* 429 * DMA tag for TX frames. 430 */ 431 sc->tx_dmat = sc->pa.pa_dmat; 432 433 return (0); 434} 435 436int 437t3_sge_free(struct adapter * sc) 438{ 439 return (0); 440} 441 442void 443t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 444{ 445 446 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 447 qs->rspq.polling = 0 /* p->polling */; 448} 449 450/** 451 * refill_fl - refill an SGE free-buffer list 452 * @sc: the controller softc 453 * @q: the free-list to refill 454 * @n: the number of new buffers to allocate 455 * 456 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 457 * The caller must assure that @n does not exceed the queue's capacity. 458 */ 459static void 460refill_fl(adapter_t *sc, struct sge_fl *q, int n) 461{ 462 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 463 struct rx_desc *d = &q->desc[q->pidx]; 464 void *cl; 465 int err; 466 467 while (n--) { 468 /* 469 * We only allocate a cluster, mbuf allocation happens after rx 470 */ 471 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) 472 { 473 err = bus_dmamap_create(sc->pa.pa_dmat, 474 q->buf_size, 1, q->buf_size, 0, 475 BUS_DMA_ALLOCNOW, &sd->map); 476 if (err != 0) 477 { 478 log(LOG_WARNING, "failure in refill_fl\n"); 479 return; 480 } 481 sd->flags |= RX_SW_DESC_MAP_CREATED; 482 } 483 cl = malloc(q->buf_size, M_DEVBUF, M_NOWAIT); 484 if (cl == NULL) 485 { 486 log(LOG_WARNING, "Failed to allocate cluster\n"); 487 break; 488 } 489 err = bus_dmamap_load(sc->pa.pa_dmat, sd->map, cl, q->buf_size, NULL, BUS_DMA_NOWAIT); 490 if (err) 491 { 492 log(LOG_WARNING, "failure in refill_fl\n"); 493 free(cl, M_DEVBUF); 494 return; 495 } 496 497 sd->flags |= RX_SW_DESC_INUSE; 498 sd->cl = cl; 499 d->addr_lo = htobe32(sd->map->dm_segs[0].ds_addr & 0xffffffff); 500 d->addr_hi = htobe32(((uint64_t)sd->map->dm_segs[0].ds_addr>>32) & 0xffffffff); 501 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 502 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 503 504 d++; 505 sd++; 506 507 if (++q->pidx == q->size) { 508 q->pidx = 0; 509 q->gen ^= 1; 510 sd = q->sdesc; 511 d = q->desc; 512 } 513 q->credits++; 514 } 515 516 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 517} 518 519 520/** 521 * free_rx_bufs - free the Rx buffers on an SGE free list 522 * @sc: the controle softc 523 * @q: the SGE free list to clean up 524 * 525 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 526 * this queue should be stopped before calling this function. 527 */ 528static void 529free_rx_bufs(adapter_t *sc, struct sge_fl *q) 530{ 531 u_int cidx = q->cidx; 532 533 while (q->credits--) { 534 struct rx_sw_desc *d = &q->sdesc[cidx]; 535 536 if (d->flags & RX_SW_DESC_INUSE) { 537 bus_dmamap_unload(q->entry_tag, d->map); 538 bus_dmamap_destroy(q->entry_tag, d->map); 539 d->map = NULL; 540 free(d->cl, M_DEVBUF); 541 d->cl = NULL; 542 } 543 d->cl = NULL; 544 if (++cidx == q->size) 545 cidx = 0; 546 } 547} 548 549static __inline void 550__refill_fl(adapter_t *adap, struct sge_fl *fl) 551{ 552 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 553} 554 555#ifndef DISABLE_MBUF_IOVEC 556/** 557 * recycle_rx_buf - recycle a receive buffer 558 * @adapter: the adapter 559 * @q: the SGE free list 560 * @idx: index of buffer to recycle 561 * 562 * Recycles the specified buffer on the given free list by adding it at 563 * the next available slot on the list. 564 */ 565static void 566recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 567{ 568 struct rx_desc *from = &q->desc[idx]; 569 struct rx_desc *to = &q->desc[q->pidx]; 570 571 q->sdesc[q->pidx] = q->sdesc[idx]; 572 to->addr_lo = from->addr_lo; // already big endian 573 to->addr_hi = from->addr_hi; // likewise 574 wmb(); 575 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 576 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 577 q->credits++; 578 579 if (++q->pidx == q->size) { 580 q->pidx = 0; 581 q->gen ^= 1; 582 } 583 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 584} 585#endif 586 587static int 588alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 589 bus_addr_t *phys, 590 void *desc, void *sdesc, bus_dma_tag_t *tag, 591 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 592{ 593 size_t len = nelem * elem_size; 594 void *s = NULL; 595 void *p = NULL; 596 int err; 597 bus_dma_segment_t phys_seg; 598 599 int nsegs; 600 601 *tag = sc->pa.pa_dmat; 602 603 /* allocate wired physical memory for DMA descriptor array */ 604 err = bus_dmamem_alloc(*tag, len, PAGE_SIZE, 0, &phys_seg, 1, 605 &nsegs, BUS_DMA_NOWAIT); 606 if (err != 0) 607 { 608 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 609 return (ENOMEM); 610 } 611 *phys = phys_seg.ds_addr; 612 613 /* map physical address to kernel virtual address */ 614 err = bus_dmamem_map(*tag, &phys_seg, 1, len, &p, 615 BUS_DMA_NOWAIT|BUS_DMA_COHERENT); 616 if (err != 0) 617 { 618 device_printf(sc->dev, "Cannot map descriptor memory\n"); 619 return (ENOMEM); 620 } 621 622 memset(p, 0, len); 623 *(void **)desc = p; 624 625 if (sw_size) 626 { 627 len = nelem * sw_size; 628 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 629 *(void **)sdesc = s; 630 } 631 632 if (parent_entry_tag == NULL) 633 return (0); 634 *entry_tag = sc->pa.pa_dmat; 635 636 return (0); 637} 638 639static void 640sge_slow_intr_handler(struct work *wk, void *arg) 641{ 642 adapter_t *sc = arg; 643 644 t3_slow_intr_handler(sc); 645} 646 647/** 648 * sge_timer_cb - perform periodic maintenance of an SGE qset 649 * @data: the SGE queue set to maintain 650 * 651 * Runs periodically from a timer to perform maintenance of an SGE queue 652 * set. It performs two tasks: 653 * 654 * a) Cleans up any completed Tx descriptors that may still be pending. 655 * Normal descriptor cleanup happens when new packets are added to a Tx 656 * queue so this timer is relatively infrequent and does any cleanup only 657 * if the Tx queue has not seen any new packets in a while. We make a 658 * best effort attempt to reclaim descriptors, in that we don't wait 659 * around if we cannot get a queue's lock (which most likely is because 660 * someone else is queueing new packets and so will also handle the clean 661 * up). Since control queues use immediate data exclusively we don't 662 * bother cleaning them up here. 663 * 664 * b) Replenishes Rx queues that have run out due to memory shortage. 665 * Normally new Rx buffers are added when existing ones are consumed but 666 * when out of memory a queue can become empty. We try to add only a few 667 * buffers here, the queue will be replenished fully as these new buffers 668 * are used up if memory shortage has subsided. 669 * 670 * c) Return coalesced response queue credits in case a response queue is 671 * starved. 672 * 673 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 674 * fifo overflows and the FW doesn't implement any recovery scheme yet. 675 */ 676 677static void 678sge_timer_cb(void *arg) 679{ 680 adapter_t *sc = arg; 681 struct port_info *p; 682 struct sge_qset *qs; 683 struct sge_txq *txq; 684 int i, j; 685 int reclaim_eth, reclaim_ofl, refill_rx; 686 687 for (i = 0; i < sc->params.nports; i++) 688 for (j = 0; j < sc->port[i].nqsets; j++) { 689 qs = &sc->sge.qs[i + j]; 690 txq = &qs->txq[0]; 691 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 692 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 693 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 694 (qs->fl[1].credits < qs->fl[1].size)); 695 if (reclaim_eth || reclaim_ofl || refill_rx) { 696 p = &sc->port[i]; 697 workqueue_enqueue(p->timer_reclaim_task.wq, &p->timer_reclaim_task.w, NULL); 698 break; 699 } 700 } 701 if (sc->params.nports > 2) { 702 int k; 703 704 for_each_port(sc, k) { 705 struct port_info *pi = &sc->port[k]; 706 707 t3_write_reg(sc, A_SG_KDOORBELL, 708 F_SELEGRCNTX | 709 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 710 } 711 } 712 if (sc->open_device_map != 0) 713 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 714} 715 716/* 717 * This is meant to be a catch-all function to keep sge state private 718 * to sge.c 719 * 720 */ 721int 722t3_sge_init_adapter(adapter_t *sc) 723{ 724 callout_init(&sc->sge_timer_ch, 0); 725 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 726 sc->slow_intr_task.name = "sge_slow_intr"; 727 sc->slow_intr_task.func = sge_slow_intr_handler; 728 sc->slow_intr_task.context = sc; 729 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &sc->slow_intr_task, NULL, "cxgb_make_task"); 730 return (0); 731} 732 733int 734t3_sge_init_port(struct port_info *p) 735{ 736 p->timer_reclaim_task.name = "sge_timer_reclaim"; 737 p->timer_reclaim_task.func = sge_timer_reclaim; 738 p->timer_reclaim_task.context = p; 739 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &p->timer_reclaim_task, NULL, "cxgb_make_task"); 740 741 return (0); 742} 743 744void 745t3_sge_deinit_sw(adapter_t *sc) 746{ 747 callout_drain(&sc->sge_timer_ch); 748} 749 750/** 751 * refill_rspq - replenish an SGE response queue 752 * @adapter: the adapter 753 * @q: the response queue to replenish 754 * @credits: how many new responses to make available 755 * 756 * Replenishes a response queue by making the supplied number of responses 757 * available to HW. 758 */ 759static __inline void 760refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 761{ 762 763 /* mbufs are allocated on demand when a rspq entry is processed. */ 764 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 765 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 766} 767 768static __inline void 769sge_txq_reclaim_(struct sge_txq *txq) 770{ 771 int reclaimable, i, n; 772 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 773 struct port_info *p; 774 775 p = txq->port; 776reclaim_more: 777 n = 0; 778 reclaimable = desc_reclaimable(txq); 779 if (reclaimable > 0 && mtx_trylock(&txq->lock)) { 780 n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec); 781 mtx_unlock(&txq->lock); 782 } 783 if (n == 0) 784 return; 785 786 for (i = 0; i < n; i++) { 787 m_freem_vec(m_vec[i]); 788 } 789 if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE && 790 txq->size - txq->in_use >= TX_START_MAX_DESC) { 791 txq_fills++; 792 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 793 workqueue_enqueue(p->start_task.wq, &p->start_task.w, NULL); 794 } 795 796 if (n) 797 goto reclaim_more; 798} 799 800static void 801sge_txq_reclaim_handler(struct work *wk, void *arg) 802{ 803 struct sge_txq *q = arg; 804 805 sge_txq_reclaim_(q); 806} 807 808static void 809sge_timer_reclaim(struct work *wk, void *arg) 810{ 811 struct port_info *p = arg; 812 int i, nqsets = p->nqsets; 813 adapter_t *sc = p->adapter; 814 struct sge_qset *qs; 815 struct sge_txq *txq; 816 struct mtx *lock; 817 818 for (i = 0; i < nqsets; i++) { 819 qs = &sc->sge.qs[i]; 820 txq = &qs->txq[TXQ_ETH]; 821 sge_txq_reclaim_(txq); 822 823 txq = &qs->txq[TXQ_OFLD]; 824 sge_txq_reclaim_(txq); 825 826 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 827 &sc->sge.qs[0].rspq.lock; 828 829 if (mtx_trylock(lock)) { 830 /* XXX currently assume that we are *NOT* polling */ 831 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 832 833 if (qs->fl[0].credits < qs->fl[0].size - 16) 834 __refill_fl(sc, &qs->fl[0]); 835 if (qs->fl[1].credits < qs->fl[1].size - 16) 836 __refill_fl(sc, &qs->fl[1]); 837 838 if (status & (1 << qs->rspq.cntxt_id)) { 839 if (qs->rspq.credits) { 840 refill_rspq(sc, &qs->rspq, 1); 841 qs->rspq.credits--; 842 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 843 1 << qs->rspq.cntxt_id); 844 } 845 } 846 mtx_unlock(lock); 847 } 848 } 849} 850 851/** 852 * init_qset_cntxt - initialize an SGE queue set context info 853 * @qs: the queue set 854 * @id: the queue set id 855 * 856 * Initializes the TIDs and context ids for the queues of a queue set. 857 */ 858static void 859init_qset_cntxt(struct sge_qset *qs, u_int id) 860{ 861 862 qs->rspq.cntxt_id = id; 863 qs->fl[0].cntxt_id = 2 * id; 864 qs->fl[1].cntxt_id = 2 * id + 1; 865 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 866 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 867 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 868 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 869 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 870} 871 872 873static void 874txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 875{ 876 txq->in_use += ndesc; 877 /* 878 * XXX we don't handle stopping of queue 879 * presumably start handles this when we bump against the end 880 */ 881 txqs->gen = txq->gen; 882 txq->unacked += ndesc; 883 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 884 txq->unacked &= 7; 885 txqs->pidx = txq->pidx; 886 txq->pidx += ndesc; 887 888 if (txq->pidx >= txq->size) { 889 txq->pidx -= txq->size; 890 txq->gen ^= 1; 891 } 892 893} 894 895/** 896 * calc_tx_descs - calculate the number of Tx descriptors for a packet 897 * @m: the packet mbufs 898 * @nsegs: the number of segments 899 * 900 * Returns the number of Tx descriptors needed for the given Ethernet 901 * packet. Ethernet packets require addition of WR and CPL headers. 902 */ 903static __inline unsigned int 904calc_tx_descs(const struct mbuf *m, int nsegs) 905{ 906 unsigned int flits; 907 908 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 909 return 1; 910 911 flits = sgl_len(nsegs) + 2; 912#ifdef TSO_SUPPORTED 913 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) 914 flits++; 915#endif 916 return flits_to_desc(flits); 917} 918 919static unsigned int 920busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 921 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) 922{ 923 struct mbuf *m0; 924 int err, pktlen; 925 int i, total_len; 926 927 m0 = *m; 928 pktlen = m0->m_pkthdr.len; 929 930 m0 = *m; 931 i = 0; 932 total_len = 0; 933 while (m0) 934 { 935 i++; 936 total_len += m0->m_len; 937 m0 = m0->m_next; 938 } 939 err = bus_dmamap_create(txq->entry_tag, total_len, TX_MAX_SEGS, total_len, 0, BUS_DMA_NOWAIT, &stx->map); 940 if (err) 941 return (err); 942 err = bus_dmamap_load_mbuf(txq->entry_tag, stx->map, *m, 0); 943 if (err) 944 return (err); 945 // feed out the physical mappings 946 *nsegs = stx->map->dm_nsegs; 947 for (i=0; i<*nsegs; i++) 948 { 949 segs[i] = stx->map->dm_segs[i]; 950 } 951#ifdef DEBUG 952 if (err) { 953 int n = 0; 954 struct mbuf *mtmp = m0; 955 while(mtmp) { 956 n++; 957 mtmp = mtmp->m_next; 958 } 959 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 960 err, m0->m_pkthdr.len, n); 961 } 962#endif 963 if (err == EFBIG) { 964 /* Too many segments, try to defrag */ 965 m0 = m_defrag(m0, M_DONTWAIT); 966 if (m0 == NULL) { 967 m_freem(*m); 968 *m = NULL; 969 return (ENOBUFS); 970 } 971 *m = m0; 972 INT3; // XXXXXXXXXXXXXXXXXX like above! 973 } 974 975 if (err == ENOMEM) { 976 return (err); 977 } 978 979 if (err) { 980 if (cxgb_debug) 981 printf("map failure err=%d pktlen=%d\n", err, pktlen); 982 m_freem_vec(m0); 983 *m = NULL; 984 return (err); 985 } 986 987 bus_dmamap_sync(txq->entry_tag, stx->map, 0, pktlen, BUS_DMASYNC_PREWRITE); 988 stx->flags |= TX_SW_DESC_MAPPED; 989 990 return (0); 991} 992 993/** 994 * make_sgl - populate a scatter/gather list for a packet 995 * @sgp: the SGL to populate 996 * @segs: the packet dma segments 997 * @nsegs: the number of segments 998 * 999 * Generates a scatter/gather list for the buffers that make up a packet 1000 * and returns the SGL size in 8-byte words. The caller must size the SGL 1001 * appropriately. 1002 */ 1003static __inline void 1004make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1005{ 1006 int i, idx; 1007 1008 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 1009 if (i && idx == 0) 1010 ++sgp; 1011 1012 sgp->len[idx] = htobe32(segs[i].ds_len); 1013 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1014 } 1015 1016 if (idx) 1017 sgp->len[idx] = 0; 1018} 1019 1020/** 1021 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1022 * @adap: the adapter 1023 * @q: the Tx queue 1024 * 1025 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1026 * where the HW is going to sleep just after we checked, however, 1027 * then the interrupt handler will detect the outstanding TX packet 1028 * and ring the doorbell for us. 1029 * 1030 * When GTS is disabled we unconditionally ring the doorbell. 1031 */ 1032static __inline void 1033check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1034{ 1035#if USE_GTS 1036 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1037 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1038 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1039#ifdef T3_TRACE 1040 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1041 q->cntxt_id); 1042#endif 1043 t3_write_reg(adap, A_SG_KDOORBELL, 1044 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1045 } 1046#else 1047 wmb(); /* write descriptors before telling HW */ 1048 t3_write_reg(adap, A_SG_KDOORBELL, 1049 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1050#endif 1051} 1052 1053static __inline void 1054wr_gen2(struct tx_desc *d, unsigned int gen) 1055{ 1056#if SGE_NUM_GENBITS == 2 1057 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1058#endif 1059} 1060 1061 1062 1063/** 1064 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1065 * @ndesc: number of Tx descriptors spanned by the SGL 1066 * @txd: first Tx descriptor to be written 1067 * @txqs: txq state (generation and producer index) 1068 * @txq: the SGE Tx queue 1069 * @sgl: the SGL 1070 * @flits: number of flits to the start of the SGL in the first descriptor 1071 * @sgl_flits: the SGL size in flits 1072 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1073 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1074 * 1075 * Write a work request header and an associated SGL. If the SGL is 1076 * small enough to fit into one Tx descriptor it has already been written 1077 * and we just need to write the WR header. Otherwise we distribute the 1078 * SGL across the number of descriptors it spans. 1079 */ 1080 1081static void 1082write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1083 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1084 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1085{ 1086 1087 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1088 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1089 1090 if (__predict_true(ndesc == 1)) { 1091 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1092 V_WR_SGLSFLT(flits)) | wr_hi; 1093 wmb(); 1094 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1095 V_WR_GEN(txqs->gen)) | wr_lo; 1096 /* XXX gen? */ 1097 wr_gen2(txd, txqs->gen); 1098 } else { 1099 unsigned int ogen = txqs->gen; 1100 const uint64_t *fp = (const uint64_t *)sgl; 1101 struct work_request_hdr *wp = wrp; 1102 1103 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1104 V_WR_SGLSFLT(flits)) | wr_hi; 1105 1106 while (sgl_flits) { 1107 unsigned int avail = WR_FLITS - flits; 1108 1109 if (avail > sgl_flits) 1110 avail = sgl_flits; 1111 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1112 sgl_flits -= avail; 1113 ndesc--; 1114 if (!sgl_flits) 1115 break; 1116 1117 fp += avail; 1118 txd++; 1119 txsd++; 1120 if (++txqs->pidx == txq->size) { 1121 txqs->pidx = 0; 1122 txqs->gen ^= 1; 1123 txd = txq->desc; 1124 txsd = txq->sdesc; 1125 } 1126 1127 /* 1128 * when the head of the mbuf chain 1129 * is freed all clusters will be freed 1130 * with it 1131 */ 1132 txsd->m = NULL; 1133 wrp = (struct work_request_hdr *)txd; 1134 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1135 V_WR_SGLSFLT(1)) | wr_hi; 1136 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1137 sgl_flits + 1)) | 1138 V_WR_GEN(txqs->gen)) | wr_lo; 1139 wr_gen2(txd, txqs->gen); 1140 flits = 1; 1141 } 1142 wrp->wr_hi |= htonl(F_WR_EOP); 1143 wmb(); 1144 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1145 wr_gen2((struct tx_desc *)wp, ogen); 1146 } 1147} 1148 1149 1150/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1151#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1152 1153int 1154t3_encap(struct port_info *p, struct mbuf **m, int *free_it) 1155{ 1156 adapter_t *sc; 1157 struct mbuf *m0; 1158 struct sge_qset *qs; 1159 struct sge_txq *txq; 1160 struct tx_sw_desc *stx; 1161 struct txq_state txqs; 1162 unsigned int ndesc, flits, cntrl, mlen; 1163 int err, nsegs, tso_info = 0; 1164 1165 struct work_request_hdr *wrp; 1166 struct tx_sw_desc *txsd; 1167 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1168 bus_dma_segment_t segs[TX_MAX_SEGS]; 1169 uint32_t wr_hi, wr_lo, sgl_flits; 1170 1171 struct tx_desc *txd; 1172 struct cpl_tx_pkt *cpl; 1173 1174 m0 = *m; 1175 sc = p->adapter; 1176 1177 DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset); 1178 1179 /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */ 1180 1181 qs = &sc->sge.qs[p->first_qset]; 1182 1183 txq = &qs->txq[TXQ_ETH]; 1184 stx = &txq->sdesc[txq->pidx]; 1185 txd = &txq->desc[txq->pidx]; 1186 cpl = (struct cpl_tx_pkt *)txd; 1187 mlen = m0->m_pkthdr.len; 1188 cpl->len = htonl(mlen | 0x80000000); 1189 1190 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan); 1191 /* 1192 * XXX handle checksum, TSO, and VLAN here 1193 * 1194 */ 1195 cntrl = V_TXPKT_INTF(p->txpkt_intf); 1196 1197 /* 1198 * XXX need to add VLAN support for 6.x 1199 */ 1200#ifdef VLAN_SUPPORTED 1201 if (m0->m_flags & M_VLANTAG) 1202 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 1203 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1204 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1205#endif 1206 if (tso_info) { 1207 int eth_type; 1208 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1209 struct ip *ip; 1210 struct tcphdr *tcp; 1211 char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1212 1213 txd->flit[2] = 0; 1214 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1215 hdr->cntrl = htonl(cntrl); 1216 1217 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1218 pkthdr = &tmp[0]; 1219 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1220 } else { 1221 pkthdr = mtod(m0, char *); 1222 } 1223 1224#ifdef VLAN_SUPPORTED 1225 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1226 eth_type = CPL_ETH_II_VLAN; 1227 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1228 ETHER_VLAN_ENCAP_LEN); 1229 } else { 1230 eth_type = CPL_ETH_II; 1231 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1232 } 1233#else 1234 eth_type = CPL_ETH_II; 1235 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1236#endif 1237 tcp = (struct tcphdr *)((uint8_t *)ip + 1238 sizeof(*ip)); 1239 1240 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1241 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1242 V_LSO_TCPHDR_WORDS(tcp->th_off); 1243 hdr->lso_info = htonl(tso_info); 1244 flits = 3; 1245 } else { 1246 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1247 cpl->cntrl = htonl(cntrl); 1248 1249 if (mlen <= WR_LEN - sizeof(*cpl)) { 1250 txq_prod(txq, 1, &txqs); 1251 txq->sdesc[txqs.pidx].m = NULL; 1252 1253 if (m0->m_len == m0->m_pkthdr.len) 1254 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen); 1255 else 1256 m_copydata(m0, 0, mlen, (void *)&txd->flit[2]); 1257 1258 *free_it = 1; 1259 flits = (mlen + 7) / 8 + 2; 1260 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1261 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1262 F_WR_SOP | F_WR_EOP | txqs.compl); 1263 wmb(); 1264 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1265 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1266 1267 wr_gen2(txd, txqs.gen); 1268 check_ring_tx_db(sc, txq); 1269 return (0); 1270 } 1271 flits = 2; 1272 } 1273 1274 wrp = (struct work_request_hdr *)txd; 1275 1276 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { 1277 return (err); 1278 } 1279 m0 = *m; 1280 ndesc = calc_tx_descs(m0, nsegs); 1281 1282 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1283 make_sgl(sgp, segs, nsegs); 1284 1285 sgl_flits = sgl_len(nsegs); 1286 1287 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1288 txq_prod(txq, ndesc, &txqs); 1289 txsd = &txq->sdesc[txqs.pidx]; 1290 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1291 wr_lo = htonl(V_WR_TID(txq->token)); 1292 txsd->m = m0; 1293 m_set_priority(m0, txqs.pidx); 1294 1295 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1296 check_ring_tx_db(p->adapter, txq); 1297 1298 return (0); 1299} 1300 1301 1302/** 1303 * write_imm - write a packet into a Tx descriptor as immediate data 1304 * @d: the Tx descriptor to write 1305 * @m: the packet 1306 * @len: the length of packet data to write as immediate data 1307 * @gen: the generation bit value to write 1308 * 1309 * Writes a packet as immediate data into a Tx descriptor. The packet 1310 * contains a work request at its beginning. We must write the packet 1311 * carefully so the SGE doesn't read accidentally before it's written in 1312 * its entirety. 1313 */ 1314static __inline void 1315write_imm(struct tx_desc *d, struct mbuf *m, 1316 unsigned int len, unsigned int gen) 1317{ 1318 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1319 struct work_request_hdr *to = (struct work_request_hdr *)d; 1320 1321 memcpy(&to[1], &from[1], len - sizeof(*from)); 1322 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1323 V_WR_BCNTLFLT(len & 7)); 1324 wmb(); 1325 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1326 V_WR_LEN((len + 7) / 8)); 1327 wr_gen2(d, gen); 1328 m_freem(m); 1329} 1330 1331/** 1332 * check_desc_avail - check descriptor availability on a send queue 1333 * @adap: the adapter 1334 * @q: the TX queue 1335 * @m: the packet needing the descriptors 1336 * @ndesc: the number of Tx descriptors needed 1337 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1338 * 1339 * Checks if the requested number of Tx descriptors is available on an 1340 * SGE send queue. If the queue is already suspended or not enough 1341 * descriptors are available the packet is queued for later transmission. 1342 * Must be called with the Tx queue locked. 1343 * 1344 * Returns 0 if enough descriptors are available, 1 if there aren't 1345 * enough descriptors and the packet has been queued, and 2 if the caller 1346 * needs to retry because there weren't enough descriptors at the 1347 * beginning of the call but some freed up in the mean time. 1348 */ 1349static __inline int 1350check_desc_avail(adapter_t *adap, struct sge_txq *q, 1351 struct mbuf *m, unsigned int ndesc, 1352 unsigned int qid) 1353{ 1354 /* 1355 * XXX We currently only use this for checking the control queue 1356 * the control queue is only used for binding qsets which happens 1357 * at init time so we are guaranteed enough descriptors 1358 */ 1359 if (__predict_false(!mbufq_empty(&q->sendq))) { 1360addq_exit: mbufq_tail(&q->sendq, m); 1361 return 1; 1362 } 1363 if (__predict_false(q->size - q->in_use < ndesc)) { 1364 1365 struct sge_qset *qs = txq_to_qset(q, qid); 1366 1367 setbit(&qs->txq_stopped, qid); 1368 smp_mb(); 1369 1370 if (should_restart_tx(q) && 1371 test_and_clear_bit(qid, &qs->txq_stopped)) 1372 return 2; 1373 1374 q->stops++; 1375 goto addq_exit; 1376 } 1377 return 0; 1378} 1379 1380 1381/** 1382 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1383 * @q: the SGE control Tx queue 1384 * 1385 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1386 * that send only immediate data (presently just the control queues) and 1387 * thus do not have any mbufs 1388 */ 1389static __inline void 1390reclaim_completed_tx_imm(struct sge_txq *q) 1391{ 1392 unsigned int reclaim = q->processed - q->cleaned; 1393 1394 mtx_assert(&q->lock, MA_OWNED); 1395 1396 q->in_use -= reclaim; 1397 q->cleaned += reclaim; 1398} 1399 1400static __inline int 1401immediate(const struct mbuf *m) 1402{ 1403 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1404} 1405 1406/** 1407 * ctrl_xmit - send a packet through an SGE control Tx queue 1408 * @adap: the adapter 1409 * @q: the control queue 1410 * @m: the packet 1411 * 1412 * Send a packet through an SGE control Tx queue. Packets sent through 1413 * a control queue must fit entirely as immediate data in a single Tx 1414 * descriptor and have no page fragments. 1415 */ 1416static int 1417ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1418{ 1419 int ret; 1420 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1421 1422 if (__predict_false(!immediate(m))) { 1423 m_freem(m); 1424 return 0; 1425 } 1426 1427 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1428 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1429 1430 mtx_lock(&q->lock); 1431again: reclaim_completed_tx_imm(q); 1432 1433 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1434 if (__predict_false(ret)) { 1435 if (ret == 1) { 1436 mtx_unlock(&q->lock); 1437 return (-1); 1438 } 1439 goto again; 1440 } 1441 1442 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1443 1444 q->in_use++; 1445 if (++q->pidx >= q->size) { 1446 q->pidx = 0; 1447 q->gen ^= 1; 1448 } 1449 mtx_unlock(&q->lock); 1450 wmb(); 1451 t3_write_reg(adap, A_SG_KDOORBELL, 1452 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1453 return (0); 1454} 1455 1456 1457/** 1458 * restart_ctrlq - restart a suspended control queue 1459 * @qs: the queue set cotaining the control queue 1460 * 1461 * Resumes transmission on a suspended Tx control queue. 1462 */ 1463static void 1464restart_ctrlq(struct work *wk, void *data) 1465{ 1466 struct mbuf *m; 1467 struct sge_qset *qs = (struct sge_qset *)data; 1468 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1469 adapter_t *adap = qs->port->adapter; 1470 1471 mtx_lock(&q->lock); 1472again: reclaim_completed_tx_imm(q); 1473 1474 while (q->in_use < q->size && 1475 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1476 1477 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1478 1479 if (++q->pidx >= q->size) { 1480 q->pidx = 0; 1481 q->gen ^= 1; 1482 } 1483 q->in_use++; 1484 } 1485 if (!mbufq_empty(&q->sendq)) { 1486 setbit(&qs->txq_stopped, TXQ_CTRL); 1487 smp_mb(); 1488 1489 if (should_restart_tx(q) && 1490 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1491 goto again; 1492 q->stops++; 1493 } 1494 mtx_unlock(&q->lock); 1495 t3_write_reg(adap, A_SG_KDOORBELL, 1496 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1497} 1498 1499 1500/* 1501 * Send a management message through control queue 0 1502 */ 1503int 1504t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1505{ 1506 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1507} 1508 1509/** 1510 * free_qset - free the resources of an SGE queue set 1511 * @sc: the controller owning the queue set 1512 * @q: the queue set 1513 * 1514 * Release the HW and SW resources associated with an SGE queue set, such 1515 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1516 * queue set must be quiesced prior to calling this. 1517 */ 1518static void 1519t3_free_qset(adapter_t *sc, struct sge_qset *q) 1520{ 1521 int i; 1522 1523 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1524 if (q->fl[i].desc) { 1525 mtx_lock(&sc->sge.reg_lock); 1526 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1527 mtx_unlock(&sc->sge.reg_lock); 1528 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1529 INT3; 1530// bus_dmamem_free(q->fl[i].desc_tag, &q->fl[i].phys_addr, 1); 1531 // XXXXXXXXXXX destroy DMA tags???? 1532 } 1533 if (q->fl[i].sdesc) { 1534 free_rx_bufs(sc, &q->fl[i]); 1535 free(q->fl[i].sdesc, M_DEVBUF); 1536 } 1537 } 1538 1539 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1540 if (q->txq[i].desc) { 1541 mtx_lock(&sc->sge.reg_lock); 1542 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1543 mtx_unlock(&sc->sge.reg_lock); 1544 bus_dmamap_unload(q->txq[i].desc_tag, 1545 q->txq[i].desc_map); 1546 INT3; 1547// bus_dmamem_free(q->txq[i].desc_tag, &q->txq[i].phys_addr, 1); 1548 // XXXXXXXXXXX destroy DMA tags???? And the lock?!??! 1549 1550 } 1551 if (q->txq[i].sdesc) { 1552 free(q->txq[i].sdesc, M_DEVBUF); 1553 } 1554 } 1555 1556 if (q->rspq.desc) { 1557 mtx_lock(&sc->sge.reg_lock); 1558 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1559 mtx_unlock(&sc->sge.reg_lock); 1560 1561 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1562 INT3; 1563// bus_dmamem_free(q->rspq.desc_tag, &q->rspq.phys_addr, 1); 1564 // XXXXXXXXXXX destroy DMA tags???? and the LOCK ?!?!? 1565 } 1566 1567 memset(q, 0, sizeof(*q)); 1568} 1569 1570/** 1571 * t3_free_sge_resources - free SGE resources 1572 * @sc: the adapter softc 1573 * 1574 * Frees resources used by the SGE queue sets. 1575 */ 1576void 1577t3_free_sge_resources(adapter_t *sc) 1578{ 1579 int i, nqsets; 1580 1581 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1582 nqsets += sc->port[i].nqsets; 1583 1584 for (i = 0; i < nqsets; ++i) 1585 t3_free_qset(sc, &sc->sge.qs[i]); 1586} 1587 1588/** 1589 * t3_sge_start - enable SGE 1590 * @sc: the controller softc 1591 * 1592 * Enables the SGE for DMAs. This is the last step in starting packet 1593 * transfers. 1594 */ 1595void 1596t3_sge_start(adapter_t *sc) 1597{ 1598 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1599} 1600 1601/** 1602 * t3_sge_stop - disable SGE operation 1603 * @sc: the adapter 1604 * 1605 * Disables the DMA engine. This can be called in emeregencies (e.g., 1606 * from error interrupts) or from normal process context. In the latter 1607 * case it also disables any pending queue restart tasklets. Note that 1608 * if it is called in interrupt context it cannot disable the restart 1609 * tasklets as it cannot wait, however the tasklets will have no effect 1610 * since the doorbells are disabled and the driver will call this again 1611 * later from process context, at which time the tasklets will be stopped 1612 * if they are still running. 1613 */ 1614void 1615t3_sge_stop(adapter_t *sc) 1616{ 1617 int i, nqsets; 1618 1619 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1620 1621 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1622 nqsets += sc->port[i].nqsets; 1623 1624 for (i = 0; i < nqsets; ++i) { 1625 } 1626} 1627 1628 1629/** 1630 * free_tx_desc - reclaims Tx descriptors and their buffers 1631 * @adapter: the adapter 1632 * @q: the Tx queue to reclaim descriptors from 1633 * @n: the number of descriptors to reclaim 1634 * 1635 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1636 * Tx buffers. Called with the Tx queue lock held. 1637 */ 1638int 1639free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec) 1640{ 1641 struct tx_sw_desc *d; 1642 unsigned int cidx = q->cidx; 1643 int nbufs = 0; 1644 1645#ifdef T3_TRACE 1646 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1647 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1648#endif 1649 d = &q->sdesc[cidx]; 1650 1651 while (n-- > 0) { 1652 DPRINTF("cidx=%d d=%p\n", cidx, d); 1653 if (d->m) { 1654 if (d->flags & TX_SW_DESC_MAPPED) { 1655 bus_dmamap_unload(q->entry_tag, d->map); 1656 bus_dmamap_destroy(q->entry_tag, d->map); 1657 d->flags &= ~TX_SW_DESC_MAPPED; 1658 } 1659 if (m_get_priority(d->m) == cidx) { 1660 m_vec[nbufs] = d->m; 1661 d->m = NULL; 1662 nbufs++; 1663 } else { 1664 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx); 1665 } 1666 } 1667 ++d; 1668 if (++cidx == q->size) { 1669 cidx = 0; 1670 d = q->sdesc; 1671 } 1672 } 1673 q->cidx = cidx; 1674 1675 return (nbufs); 1676} 1677 1678/** 1679 * is_new_response - check if a response is newly written 1680 * @r: the response descriptor 1681 * @q: the response queue 1682 * 1683 * Returns true if a response descriptor contains a yet unprocessed 1684 * response. 1685 */ 1686static __inline int 1687is_new_response(const struct rsp_desc *r, 1688 const struct sge_rspq *q) 1689{ 1690 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1691} 1692 1693#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1694#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1695 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1696 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1697 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1698 1699/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1700#define NOMEM_INTR_DELAY 2500 1701 1702/** 1703 * write_ofld_wr - write an offload work request 1704 * @adap: the adapter 1705 * @m: the packet to send 1706 * @q: the Tx queue 1707 * @pidx: index of the first Tx descriptor to write 1708 * @gen: the generation value to use 1709 * @ndesc: number of descriptors the packet will occupy 1710 * 1711 * Write an offload work request to send the supplied packet. The packet 1712 * data already carry the work request with most fields populated. 1713 */ 1714static void 1715write_ofld_wr(adapter_t *adap, struct mbuf *m, 1716 struct sge_txq *q, unsigned int pidx, 1717 unsigned int gen, unsigned int ndesc, 1718 bus_dma_segment_t *segs, unsigned int nsegs) 1719{ 1720 unsigned int sgl_flits, flits; 1721 struct work_request_hdr *from; 1722 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1723 struct tx_desc *d = &q->desc[pidx]; 1724 struct txq_state txqs; 1725 1726 if (immediate(m)) { 1727 q->sdesc[pidx].m = NULL; 1728 write_imm(d, m, m->m_len, gen); 1729 return; 1730 } 1731 1732 /* Only TX_DATA builds SGLs */ 1733 1734 from = mtod(m, struct work_request_hdr *); 1735 INT3; /// DEBUG this??? 1736 flits = 3; // XXXXXXXXXXXXXX 1737 1738 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1739 1740 make_sgl(sgp, segs, nsegs); 1741 sgl_flits = sgl_len(nsegs); 1742 1743 txqs.gen = q->gen; 1744 txqs.pidx = q->pidx; 1745 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3); 1746 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1747 from->wr_hi, from->wr_lo); 1748} 1749 1750/** 1751 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1752 * @m: the packet 1753 * 1754 * Returns the number of Tx descriptors needed for the given offload 1755 * packet. These packets are already fully constructed. 1756 */ 1757static __inline unsigned int 1758calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1759{ 1760 unsigned int flits, cnt = 0; 1761 1762 1763 if (m->m_len <= WR_LEN) 1764 return 1; /* packet fits as immediate data */ 1765 1766 if (m->m_flags & M_IOVEC) 1767 cnt = mtomv(m)->mv_count; 1768 1769 INT3; // Debug this???? 1770 flits = 3; // XXXXXXXXX 1771 1772 return flits_to_desc(flits + sgl_len(cnt)); 1773} 1774 1775/** 1776 * ofld_xmit - send a packet through an offload queue 1777 * @adap: the adapter 1778 * @q: the Tx offload queue 1779 * @m: the packet 1780 * 1781 * Send an offload packet through an SGE offload queue. 1782 */ 1783static int 1784ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1785{ 1786 int ret, nsegs; 1787 unsigned int ndesc; 1788 unsigned int pidx, gen; 1789 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1790 bus_dma_segment_t segs[TX_MAX_SEGS]; 1791 int i, cleaned; 1792 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1793 1794 mtx_lock(&q->lock); 1795 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) { 1796 mtx_unlock(&q->lock); 1797 return (ret); 1798 } 1799 ndesc = calc_tx_descs_ofld(m, nsegs); 1800again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); 1801 1802 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1803 if (__predict_false(ret)) { 1804 if (ret == 1) { 1805 m_set_priority(m, ndesc); /* save for restart */ 1806 mtx_unlock(&q->lock); 1807 return EINTR; 1808 } 1809 goto again; 1810 } 1811 1812 gen = q->gen; 1813 q->in_use += ndesc; 1814 pidx = q->pidx; 1815 q->pidx += ndesc; 1816 if (q->pidx >= q->size) { 1817 q->pidx -= q->size; 1818 q->gen ^= 1; 1819 } 1820#ifdef T3_TRACE 1821 T3_TRACE5(adap->tb[q->cntxt_id & 7], 1822 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 1823 ndesc, pidx, skb->len, skb->len - skb->data_len, 1824 skb_shinfo(skb)->nr_frags); 1825#endif 1826 mtx_unlock(&q->lock); 1827 1828 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1829 check_ring_tx_db(adap, q); 1830 1831 for (i = 0; i < cleaned; i++) { 1832 m_freem_vec(m_vec[i]); 1833 } 1834 return (0); 1835} 1836 1837/** 1838 * restart_offloadq - restart a suspended offload queue 1839 * @qs: the queue set cotaining the offload queue 1840 * 1841 * Resumes transmission on a suspended Tx offload queue. 1842 */ 1843static void 1844restart_offloadq(struct work *wk, void *data) 1845{ 1846 1847 struct mbuf *m; 1848 struct sge_qset *qs = data; 1849 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 1850 adapter_t *adap = qs->port->adapter; 1851 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1852 bus_dma_segment_t segs[TX_MAX_SEGS]; 1853 int nsegs, i, cleaned; 1854 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1855 1856 mtx_lock(&q->lock); 1857again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); 1858 1859 while ((m = mbufq_peek(&q->sendq)) != NULL) { 1860 unsigned int gen, pidx; 1861 unsigned int ndesc = m_get_priority(m); 1862 1863 if (__predict_false(q->size - q->in_use < ndesc)) { 1864 setbit(&qs->txq_stopped, TXQ_OFLD); 1865 smp_mb(); 1866 1867 if (should_restart_tx(q) && 1868 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 1869 goto again; 1870 q->stops++; 1871 break; 1872 } 1873 1874 gen = q->gen; 1875 q->in_use += ndesc; 1876 pidx = q->pidx; 1877 q->pidx += ndesc; 1878 if (q->pidx >= q->size) { 1879 q->pidx -= q->size; 1880 q->gen ^= 1; 1881 } 1882 1883 (void)mbufq_dequeue(&q->sendq); 1884 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 1885 mtx_unlock(&q->lock); 1886 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1887 mtx_lock(&q->lock); 1888 } 1889 mtx_unlock(&q->lock); 1890 1891#if USE_GTS 1892 set_bit(TXQ_RUNNING, &q->flags); 1893 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1894#endif 1895 t3_write_reg(adap, A_SG_KDOORBELL, 1896 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1897 1898 for (i = 0; i < cleaned; i++) { 1899 m_freem_vec(m_vec[i]); 1900 } 1901} 1902 1903/** 1904 * queue_set - return the queue set a packet should use 1905 * @m: the packet 1906 * 1907 * Maps a packet to the SGE queue set it should use. The desired queue 1908 * set is carried in bits 1-3 in the packet's priority. 1909 */ 1910static __inline int 1911queue_set(const struct mbuf *m) 1912{ 1913 return m_get_priority(m) >> 1; 1914} 1915 1916/** 1917 * is_ctrl_pkt - return whether an offload packet is a control packet 1918 * @m: the packet 1919 * 1920 * Determines whether an offload packet should use an OFLD or a CTRL 1921 * Tx queue. This is indicated by bit 0 in the packet's priority. 1922 */ 1923static __inline int 1924is_ctrl_pkt(const struct mbuf *m) 1925{ 1926 return m_get_priority(m) & 1; 1927} 1928 1929/** 1930 * t3_offload_tx - send an offload packet 1931 * @tdev: the offload device to send to 1932 * @m: the packet 1933 * 1934 * Sends an offload packet. We use the packet priority to select the 1935 * appropriate Tx queue as follows: bit 0 indicates whether the packet 1936 * should be sent as regular or control, bits 1-3 select the queue set. 1937 */ 1938int 1939t3_offload_tx(struct toedev *tdev, struct mbuf *m) 1940{ 1941 adapter_t *adap = tdev2adap(tdev); 1942 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 1943 1944 if (__predict_false(is_ctrl_pkt(m))) 1945 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 1946 1947 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 1948} 1949 1950static void 1951restart_tx(struct sge_qset *qs) 1952{ 1953 if (isset(&qs->txq_stopped, TXQ_OFLD) && 1954 should_restart_tx(&qs->txq[TXQ_OFLD]) && 1955 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 1956 qs->txq[TXQ_OFLD].restarts++; 1957 workqueue_enqueue(qs->txq[TXQ_OFLD].qresume_task.wq, &qs->txq[TXQ_OFLD].qresume_task.w, NULL); 1958 } 1959 if (isset(&qs->txq_stopped, TXQ_CTRL) && 1960 should_restart_tx(&qs->txq[TXQ_CTRL]) && 1961 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 1962 qs->txq[TXQ_CTRL].restarts++; 1963 workqueue_enqueue(qs->txq[TXQ_CTRL].qresume_task.wq, &qs->txq[TXQ_CTRL].qresume_task.w, NULL); 1964 } 1965} 1966 1967/** 1968 * t3_sge_alloc_qset - initialize an SGE queue set 1969 * @sc: the controller softc 1970 * @id: the queue set id 1971 * @nports: how many Ethernet ports will be using this queue set 1972 * @irq_vec_idx: the IRQ vector index for response queue interrupts 1973 * @p: configuration parameters for this queue set 1974 * @ntxq: number of Tx queues for the queue set 1975 * @pi: port info for queue set 1976 * 1977 * Allocate resources and initialize an SGE queue set. A queue set 1978 * comprises a response queue, two Rx free-buffer queues, and up to 3 1979 * Tx queues. The Tx queues are assigned roles in the order Ethernet 1980 * queue, offload queue, and control queue. 1981 */ 1982int 1983t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 1984 const struct qset_params *p, int ntxq, struct port_info *pi) 1985{ 1986 struct sge_qset *q = &sc->sge.qs[id]; 1987 int i, ret = 0; 1988 1989 init_qset_cntxt(q, id); 1990 1991 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 1992 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 1993 &q->fl[0].desc, &q->fl[0].sdesc, 1994 &q->fl[0].desc_tag, &q->fl[0].desc_map, 1995 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 1996 goto err; 1997 } 1998 1999 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2000 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2001 &q->fl[1].desc, &q->fl[1].sdesc, 2002 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2003 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2004 goto err; 2005 } 2006 2007 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2008 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2009 &q->rspq.desc_tag, &q->rspq.desc_map, 2010 NULL, NULL)) != 0) { 2011 goto err; 2012 } 2013 2014 for (i = 0; i < ntxq; ++i) { 2015 /* 2016 * The control queue always uses immediate data so does not 2017 * need to keep track of any mbufs. 2018 * XXX Placeholder for future TOE support. 2019 */ 2020 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2021 2022 if ((ret = alloc_ring(sc, p->txq_size[i], 2023 sizeof(struct tx_desc), sz, 2024 &q->txq[i].phys_addr, &q->txq[i].desc, 2025 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2026 &q->txq[i].desc_map, 2027 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2028 goto err; 2029 } 2030 mbufq_init(&q->txq[i].sendq); 2031 q->txq[i].gen = 1; 2032 q->txq[i].size = p->txq_size[i]; 2033 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2034 0, irq_vec_idx, i); 2035 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); 2036 } 2037 2038 q->txq[TXQ_ETH].port = pi; 2039 2040 q->txq[TXQ_OFLD].qresume_task.name = "restart_offloadq"; 2041 q->txq[TXQ_OFLD].qresume_task.func = restart_offloadq; 2042 q->txq[TXQ_OFLD].qresume_task.context = q; 2043 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qresume_task, NULL, "cxgb_make_task"); 2044 2045 q->txq[TXQ_CTRL].qresume_task.name = "restart_ctrlq"; 2046 q->txq[TXQ_CTRL].qresume_task.func = restart_ctrlq; 2047 q->txq[TXQ_CTRL].qresume_task.context = q; 2048 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_CTRL].qresume_task, NULL, "cxgb_make_task"); 2049 2050 q->txq[TXQ_ETH].qreclaim_task.name = "sge_txq_reclaim_handler"; 2051 q->txq[TXQ_ETH].qreclaim_task.func = sge_txq_reclaim_handler; 2052 q->txq[TXQ_ETH].qreclaim_task.context = &q->txq[TXQ_ETH]; 2053 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_ETH].qreclaim_task, NULL, "cxgb_make_task"); 2054 2055 q->txq[TXQ_OFLD].qreclaim_task.name = "sge_txq_reclaim_handler"; 2056 q->txq[TXQ_OFLD].qreclaim_task.func = sge_txq_reclaim_handler; 2057 q->txq[TXQ_OFLD].qreclaim_task.context = &q->txq[TXQ_OFLD]; 2058 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qreclaim_task, NULL, "cxgb_make_task"); 2059 2060 q->fl[0].gen = q->fl[1].gen = 1; 2061 q->fl[0].size = p->fl_size; 2062 q->fl[1].size = p->jumbo_size; 2063 2064 q->rspq.gen = 1; 2065 q->rspq.cidx = 0; 2066 q->rspq.size = p->rspq_size; 2067 2068 q->txq[TXQ_ETH].stop_thres = nports * 2069 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2070 2071 q->fl[0].buf_size = MCLBYTES; 2072 q->fl[1].buf_size = MJUMPAGESIZE; 2073 2074 q->lro.enabled = lro_default; 2075 2076 mtx_lock(&sc->sge.reg_lock); 2077 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2078 q->rspq.phys_addr, q->rspq.size, 2079 q->fl[0].buf_size, 1, 0); 2080 if (ret) { 2081 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2082 goto err_unlock; 2083 } 2084 2085 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2086 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2087 q->fl[i].phys_addr, q->fl[i].size, 2088 q->fl[i].buf_size, p->cong_thres, 1, 2089 0); 2090 if (ret) { 2091 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2092 goto err_unlock; 2093 } 2094 } 2095 2096 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2097 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2098 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2099 1, 0); 2100 if (ret) { 2101 printf("error %d from t3_sge_init_ecntxt\n", ret); 2102 goto err_unlock; 2103 } 2104 2105 if (ntxq > 1) { 2106 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2107 USE_GTS, SGE_CNTXT_OFLD, id, 2108 q->txq[TXQ_OFLD].phys_addr, 2109 q->txq[TXQ_OFLD].size, 0, 1, 0); 2110 if (ret) { 2111 printf("error %d from t3_sge_init_ecntxt\n", ret); 2112 goto err_unlock; 2113 } 2114 } 2115 2116 if (ntxq > 2) { 2117 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2118 SGE_CNTXT_CTRL, id, 2119 q->txq[TXQ_CTRL].phys_addr, 2120 q->txq[TXQ_CTRL].size, 2121 q->txq[TXQ_CTRL].token, 1, 0); 2122 if (ret) { 2123 printf("error %d from t3_sge_init_ecntxt\n", ret); 2124 goto err_unlock; 2125 } 2126 } 2127 2128 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2129 0, irq_vec_idx); 2130 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2131 2132 mtx_unlock(&sc->sge.reg_lock); 2133 t3_update_qset_coalesce(q, p); 2134 q->port = pi; 2135 2136 refill_fl(sc, &q->fl[0], q->fl[0].size); 2137 refill_fl(sc, &q->fl[1], q->fl[1].size); 2138 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2139 2140 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2141 V_NEWTIMER(q->rspq.holdoff_tmr)); 2142 2143 return (0); 2144 2145err_unlock: 2146 mtx_unlock(&sc->sge.reg_lock); 2147err: 2148 t3_free_qset(sc, q); 2149 2150 return (ret); 2151} 2152 2153void 2154t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2155{ 2156 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2157 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2158 struct ifnet *ifp = pi->ifp; 2159 2160 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2161 2162 /* 2163 * XXX need to add VLAN support for 6.x 2164 */ 2165#ifdef VLAN_SUPPORTED 2166 if (__predict_false(cpl->vlan_valid)) { 2167 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2168 m->m_flags |= M_VLANTAG; 2169 } 2170#endif 2171 2172 m->m_pkthdr.rcvif = ifp; 2173 m_explode(m); 2174 /* 2175 * adjust after conversion to mbuf chain 2176 */ 2177 m_adj(m, sizeof(*cpl) + ethpad); 2178 2179 (*ifp->if_input)(ifp, m); 2180} 2181 2182/** 2183 * get_packet - return the next ingress packet buffer from a free list 2184 * @adap: the adapter that received the packet 2185 * @drop_thres: # of remaining buffers before we start dropping packets 2186 * @qs: the qset that the SGE free list holding the packet belongs to 2187 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2188 * @r: response descriptor 2189 * 2190 * Get the next packet from a free list and complete setup of the 2191 * sk_buff. If the packet is small we make a copy and recycle the 2192 * original buffer, otherwise we use the original buffer itself. If a 2193 * positive drop threshold is supplied packets are dropped and their 2194 * buffers recycled if (a) the number of remaining buffers is under the 2195 * threshold and the packet is too big to copy, or (b) the packet should 2196 * be copied but there is no memory for the copy. 2197 */ 2198#ifdef DISABLE_MBUF_IOVEC 2199 2200static int 2201get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2202 struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m) 2203{ 2204 2205 unsigned int len_cq = ntohl(r->len_cq); 2206 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2207 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2208 uint32_t len = G_RSPD_LEN(len_cq); 2209 uint32_t flags = ntohl(r->flags); 2210 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2211 int ret = 0; 2212 2213 prefetch(sd->cl); 2214 2215 fl->credits--; 2216 bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD); 2217 bus_dmamap_unload(fl->entry_tag, sd->map); 2218 2219 m->m_len = len; 2220 m_cljset(m, sd->cl, fl->type); 2221 2222 switch(sopeop) { 2223 case RSPQ_SOP_EOP: 2224 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2225 mh->mh_head = mh->mh_tail = m; 2226 m->m_pkthdr.len = len; 2227 m->m_flags |= M_PKTHDR; 2228 ret = 1; 2229 break; 2230 case RSPQ_NSOP_NEOP: 2231 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2232 m->m_flags &= ~M_PKTHDR; 2233 if (mh->mh_tail == NULL) { 2234 if (cxgb_debug) 2235 printf("discarding intermediate descriptor entry\n"); 2236 m_freem(m); 2237 break; 2238 } 2239 mh->mh_tail->m_next = m; 2240 mh->mh_tail = m; 2241 mh->mh_head->m_pkthdr.len += len; 2242 ret = 0; 2243 break; 2244 case RSPQ_SOP: 2245 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2246 m->m_pkthdr.len = len; 2247 mh->mh_head = mh->mh_tail = m; 2248 m->m_flags |= M_PKTHDR; 2249 ret = 0; 2250 break; 2251 case RSPQ_EOP: 2252 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2253 m->m_flags &= ~M_PKTHDR; 2254 mh->mh_head->m_pkthdr.len += len; 2255 mh->mh_tail->m_next = m; 2256 mh->mh_tail = m; 2257 ret = 1; 2258 break; 2259 } 2260 if (++fl->cidx == fl->size) 2261 fl->cidx = 0; 2262 2263 return (ret); 2264} 2265 2266#else 2267static int 2268get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2269 struct mbuf *m, struct rsp_desc *r) 2270{ 2271 2272 unsigned int len_cq = ntohl(r->len_cq); 2273 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2274 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2275 uint32_t len = G_RSPD_LEN(len_cq); 2276 uint32_t flags = ntohl(r->flags); 2277 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2278 void *cl; 2279 int ret = 0; 2280 2281 prefetch(sd->cl); 2282 2283 fl->credits--; 2284 bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD); 2285 2286 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2287 cl = mtod(m, void *); 2288 memcpy(cl, sd->cl, len); 2289 recycle_rx_buf(adap, fl, fl->cidx); 2290 } else { 2291 cl = sd->cl; 2292 bus_dmamap_unload(fl->entry_tag, sd->map); 2293 } 2294 switch(sopeop) { 2295 case RSPQ_SOP_EOP: 2296 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2297 m->m_len = m->m_pkthdr.len = len; 2298 if (cl == sd->cl) 2299 m_cljset(m, cl, fl->type); 2300 ret = 1; 2301 goto done; 2302 break; 2303 case RSPQ_NSOP_NEOP: 2304 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2305 ret = 0; 2306 break; 2307 case RSPQ_SOP: 2308 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2309 m_iovinit(m); 2310 ret = 0; 2311 break; 2312 case RSPQ_EOP: 2313 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2314 ret = 1; 2315 break; 2316 } 2317 m_iovappend(m, cl, fl->buf_size, len, 0); 2318 2319done: 2320 if (++fl->cidx == fl->size) 2321 fl->cidx = 0; 2322 2323 return (ret); 2324} 2325#endif 2326/** 2327 * handle_rsp_cntrl_info - handles control information in a response 2328 * @qs: the queue set corresponding to the response 2329 * @flags: the response control flags 2330 * 2331 * Handles the control information of an SGE response, such as GTS 2332 * indications and completion credits for the queue set's Tx queues. 2333 * HW coalesces credits, we don't do any extra SW coalescing. 2334 */ 2335static __inline void 2336handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2337{ 2338 unsigned int credits; 2339 2340#if USE_GTS 2341 if (flags & F_RSPD_TXQ0_GTS) 2342 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2343#endif 2344 credits = G_RSPD_TXQ0_CR(flags); 2345 if (credits) { 2346 qs->txq[TXQ_ETH].processed += credits; 2347 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 2348 workqueue_enqueue(qs->port->timer_reclaim_task.wq, 2349 &qs->port->timer_reclaim_task.w, NULL); 2350 } 2351 2352 credits = G_RSPD_TXQ2_CR(flags); 2353 if (credits) 2354 qs->txq[TXQ_CTRL].processed += credits; 2355 2356# if USE_GTS 2357 if (flags & F_RSPD_TXQ1_GTS) 2358 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2359# endif 2360 credits = G_RSPD_TXQ1_CR(flags); 2361 if (credits) 2362 qs->txq[TXQ_OFLD].processed += credits; 2363} 2364 2365static void 2366check_ring_db(adapter_t *adap, struct sge_qset *qs, 2367 unsigned int sleeping) 2368{ 2369 ; 2370} 2371 2372/** 2373 * process_responses - process responses from an SGE response queue 2374 * @adap: the adapter 2375 * @qs: the queue set to which the response queue belongs 2376 * @budget: how many responses can be processed in this round 2377 * 2378 * Process responses from an SGE response queue up to the supplied budget. 2379 * Responses include received packets as well as credits and other events 2380 * for the queues that belong to the response queue's queue set. 2381 * A negative budget is effectively unlimited. 2382 * 2383 * Additionally choose the interrupt holdoff time for the next interrupt 2384 * on this queue. If the system is under memory shortage use a fairly 2385 * long delay to help recovery. 2386 */ 2387static int 2388process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2389{ 2390 struct sge_rspq *rspq = &qs->rspq; 2391 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2392 int budget_left = budget; 2393 unsigned int sleeping = 0; 2394 int lro = qs->lro.enabled; 2395#ifdef DEBUG 2396 static int last_holdoff = 0; 2397 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2398 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2399 last_holdoff = rspq->holdoff_tmr; 2400 } 2401#endif 2402 rspq->next_holdoff = rspq->holdoff_tmr; 2403 2404 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2405 int eth, eop = 0, ethpad = 0; 2406 uint32_t flags = ntohl(r->flags); 2407 uint32_t rss_csum = *(const uint32_t *)r; 2408 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 2409 2410 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2411 2412 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2413 /* XXX */ 2414 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2415#ifdef DISABLE_MBUF_IOVEC 2416 if (cxgb_debug) 2417 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx); 2418 2419 if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) { 2420 rspq->next_holdoff = NOMEM_INTR_DELAY; 2421 budget_left--; 2422 break; 2423 } else { 2424 eop = 1; 2425 } 2426#else 2427 struct mbuf *m = NULL; 2428 2429 if (rspq->rspq_mbuf == NULL) 2430 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); 2431 else 2432 m = m_gethdr(M_DONTWAIT, MT_DATA); 2433 2434 /* 2435 * XXX revisit me 2436 */ 2437 if (rspq->rspq_mbuf == NULL && m == NULL) { 2438 rspq->next_holdoff = NOMEM_INTR_DELAY; 2439 budget_left--; 2440 break; 2441 } 2442 if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags)) 2443 goto skip; 2444 eop = 1; 2445#endif 2446 rspq->imm_data++; 2447 } else if (r->len_cq) { 2448 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2449 2450#ifdef DISABLE_MBUF_IOVEC 2451 struct mbuf *m; 2452 m = m_gethdr(M_NOWAIT, MT_DATA); 2453 2454 if (m == NULL) { 2455 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2456 break; 2457 } 2458 2459 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m); 2460#else 2461 if (rspq->rspq_mbuf == NULL) 2462 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); 2463 if (rspq->rspq_mbuf == NULL) { 2464 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2465 break; 2466 } 2467 eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r); 2468#endif 2469 ethpad = 2; 2470 } else { 2471 DPRINTF("pure response\n"); 2472 rspq->pure_rsps++; 2473 } 2474 2475 if (flags & RSPD_CTRL_MASK) { 2476 sleeping |= flags & RSPD_GTS_MASK; 2477 handle_rsp_cntrl_info(qs, flags); 2478 } 2479#ifndef DISABLE_MBUF_IOVEC 2480 skip: 2481#endif 2482 r++; 2483 if (__predict_false(++rspq->cidx == rspq->size)) { 2484 rspq->cidx = 0; 2485 rspq->gen ^= 1; 2486 r = rspq->desc; 2487 } 2488 2489 prefetch(r); 2490 if (++rspq->credits >= (rspq->size / 4)) { 2491 refill_rspq(adap, rspq, rspq->credits); 2492 rspq->credits = 0; 2493 } 2494 2495 if (eop) { 2496 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *)); 2497 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES); 2498 2499 if (eth) { 2500 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad, 2501 rss_hash, rss_csum, lro); 2502 2503 rspq->rspq_mh.mh_head = NULL; 2504 } else { 2505 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 2506 /* 2507 * XXX size mismatch 2508 */ 2509 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 2510 } 2511 __refill_fl(adap, &qs->fl[0]); 2512 __refill_fl(adap, &qs->fl[1]); 2513 2514 } 2515 --budget_left; 2516 } 2517 2518 t3_lro_flush(adap, qs, &qs->lro); 2519 2520 if (sleeping) 2521 check_ring_db(adap, qs, sleeping); 2522 2523 smp_mb(); /* commit Tx queue processed updates */ 2524 if (__predict_false(qs->txq_stopped != 0)) 2525 restart_tx(qs); 2526 2527 budget -= budget_left; 2528 return (budget); 2529} 2530 2531/* 2532 * A helper function that processes responses and issues GTS. 2533 */ 2534static __inline int 2535process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2536{ 2537 int work; 2538 static int last_holdoff = 0; 2539 2540 work = process_responses(adap, rspq_to_qset(rq), -1); 2541 2542 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2543 printf("next_holdoff=%d\n", rq->next_holdoff); 2544 last_holdoff = rq->next_holdoff; 2545 } 2546 if (work) 2547 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2548 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2549 return work; 2550} 2551 2552 2553/* 2554 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2555 * Handles data events from SGE response queues as well as error and other 2556 * async events as they all use the same interrupt pin. We use one SGE 2557 * response queue per port in this mode and protect all response queues with 2558 * queue 0's lock. 2559 */ 2560int 2561t3b_intr(void *data) 2562{ 2563 uint32_t i, map; 2564 adapter_t *adap = data; 2565 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2566 2567 t3_write_reg(adap, A_PL_CLI, 0); 2568 map = t3_read_reg(adap, A_SG_DATA_INTR); 2569 2570 if (!map) 2571 return (FALSE); 2572 2573 if (__predict_false(map & F_ERRINTR)) 2574 workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL); 2575 2576 mtx_lock(&q0->lock); 2577 for_each_port(adap, i) 2578 if (map & (1 << i)) 2579 process_responses_gts(adap, &adap->sge.qs[i].rspq); 2580 mtx_unlock(&q0->lock); 2581 2582 return (TRUE); 2583} 2584 2585/* 2586 * The MSI interrupt handler. This needs to handle data events from SGE 2587 * response queues as well as error and other async events as they all use 2588 * the same MSI vector. We use one SGE response queue per port in this mode 2589 * and protect all response queues with queue 0's lock. 2590 */ 2591int 2592t3_intr_msi(void *data) 2593{ 2594 adapter_t *adap = data; 2595 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2596 int i, new_packets = 0; 2597 2598 mtx_lock(&q0->lock); 2599 2600 for_each_port(adap, i) 2601 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 2602 new_packets = 1; 2603 mtx_unlock(&q0->lock); 2604 if (new_packets == 0) 2605 workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL); 2606 2607 return (TRUE); 2608} 2609 2610int 2611t3_intr_msix(void *data) 2612{ 2613 struct sge_qset *qs = data; 2614 adapter_t *adap = qs->port->adapter; 2615 struct sge_rspq *rspq = &qs->rspq; 2616 2617 mtx_lock(&rspq->lock); 2618 if (process_responses_gts(adap, rspq) == 0) 2619 rspq->unhandled_irqs++; 2620 mtx_unlock(&rspq->lock); 2621 2622 return (TRUE); 2623} 2624 2625/** 2626 * t3_get_desc - dump an SGE descriptor for debugging purposes 2627 * @qs: the queue set 2628 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2629 * @idx: the descriptor index in the queue 2630 * @data: where to dump the descriptor contents 2631 * 2632 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2633 * size of the descriptor. 2634 */ 2635int 2636t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2637 unsigned char *data) 2638{ 2639 if (qnum >= 6) 2640 return (EINVAL); 2641 2642 if (qnum < 3) { 2643 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2644 return -EINVAL; 2645 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2646 return sizeof(struct tx_desc); 2647 } 2648 2649 if (qnum == 3) { 2650 if (!qs->rspq.desc || idx >= qs->rspq.size) 2651 return (EINVAL); 2652 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2653 return sizeof(struct rsp_desc); 2654 } 2655 2656 qnum -= 4; 2657 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2658 return (EINVAL); 2659 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2660 return sizeof(struct rx_desc); 2661} 2662