cxgb_sge.c revision 194259
1/************************************************************************** 2 3Copyright (c) 2007-2009, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 194259 2009-06-15 19:50:03Z sam $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/kernel.h> 36#include <sys/module.h> 37#include <sys/bus.h> 38#include <sys/conf.h> 39#include <machine/bus.h> 40#include <machine/resource.h> 41#include <sys/bus_dma.h> 42#include <sys/rman.h> 43#include <sys/queue.h> 44#include <sys/sysctl.h> 45#include <sys/taskqueue.h> 46 47#include <sys/proc.h> 48#include <sys/sbuf.h> 49#include <sys/sched.h> 50#include <sys/smp.h> 51#include <sys/systm.h> 52#include <sys/syslog.h> 53 54#include <netinet/in_systm.h> 55#include <netinet/in.h> 56#include <netinet/ip.h> 57#include <netinet/tcp.h> 58 59#include <dev/pci/pcireg.h> 60#include <dev/pci/pcivar.h> 61 62#include <vm/vm.h> 63#include <vm/pmap.h> 64 65#include <cxgb_include.h> 66#include <sys/mvec.h> 67 68int txq_fills = 0; 69/* 70 * XXX don't re-enable this until TOE stops assuming 71 * we have an m_ext 72 */ 73static int recycle_enable = 0; 74extern int cxgb_txq_buf_ring_size; 75int cxgb_cached_allocations; 76int cxgb_cached; 77int cxgb_ext_freed = 0; 78int cxgb_ext_inited = 0; 79int fl_q_size = 0; 80int jumbo_q_size = 0; 81 82extern int cxgb_use_16k_clusters; 83extern int cxgb_pcpu_cache_enable; 84extern int nmbjumbo4; 85extern int nmbjumbo9; 86extern int nmbjumbo16; 87extern int multiq_tx_enable; 88extern int coalesce_tx_enable; 89extern int wakeup_tx_thread; 90 91#define USE_GTS 0 92 93#define SGE_RX_SM_BUF_SIZE 1536 94#define SGE_RX_DROP_THRES 16 95#define SGE_RX_COPY_THRES 128 96 97/* 98 * Period of the Tx buffer reclaim timer. This timer does not need to run 99 * frequently as Tx buffers are usually reclaimed by new Tx packets. 100 */ 101#define TX_RECLAIM_PERIOD (hz >> 1) 102 103/* 104 * Values for sge_txq.flags 105 */ 106enum { 107 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 108 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 109}; 110 111struct tx_desc { 112 uint64_t flit[TX_DESC_FLITS]; 113} __packed; 114 115struct rx_desc { 116 uint32_t addr_lo; 117 uint32_t len_gen; 118 uint32_t gen2; 119 uint32_t addr_hi; 120} __packed;; 121 122struct rsp_desc { /* response queue descriptor */ 123 struct rss_header rss_hdr; 124 uint32_t flags; 125 uint32_t len_cq; 126 uint8_t imm_data[47]; 127 uint8_t intr_gen; 128} __packed; 129 130#define RX_SW_DESC_MAP_CREATED (1 << 0) 131#define TX_SW_DESC_MAP_CREATED (1 << 1) 132#define RX_SW_DESC_INUSE (1 << 3) 133#define TX_SW_DESC_MAPPED (1 << 4) 134 135#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 136#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 137#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 138#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 139 140struct tx_sw_desc { /* SW state per Tx descriptor */ 141 struct mbuf_iovec mi; 142 bus_dmamap_t map; 143 int flags; 144}; 145 146struct rx_sw_desc { /* SW state per Rx descriptor */ 147 caddr_t rxsd_cl; 148 caddr_t data; 149 bus_dmamap_t map; 150 int flags; 151}; 152 153struct txq_state { 154 unsigned int compl; 155 unsigned int gen; 156 unsigned int pidx; 157}; 158 159struct refill_fl_cb_arg { 160 int error; 161 bus_dma_segment_t seg; 162 int nseg; 163}; 164 165/* 166 * Maps a number of flits to the number of Tx descriptors that can hold them. 167 * The formula is 168 * 169 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 170 * 171 * HW allows up to 4 descriptors to be combined into a WR. 172 */ 173static uint8_t flit_desc_map[] = { 174 0, 175#if SGE_NUM_GENBITS == 1 176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 177 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 178 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 179 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 180#elif SGE_NUM_GENBITS == 2 181 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 182 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 183 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 184 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 185#else 186# error "SGE_NUM_GENBITS must be 1 or 2" 187#endif 188}; 189 190 191int cxgb_debug = 0; 192 193static void sge_timer_cb(void *arg); 194static void sge_timer_reclaim(void *arg, int ncount); 195static void sge_txq_reclaim_handler(void *arg, int ncount); 196 197/** 198 * reclaim_completed_tx - reclaims completed Tx descriptors 199 * @adapter: the adapter 200 * @q: the Tx queue to reclaim completed descriptors from 201 * 202 * Reclaims Tx descriptors that the SGE has indicated it has processed, 203 * and frees the associated buffers if possible. Called with the Tx 204 * queue's lock held. 205 */ 206static __inline int 207reclaim_completed_tx_(struct sge_txq *q, int reclaim_min) 208{ 209 int reclaim = desc_reclaimable(q); 210 211 if (reclaim < reclaim_min) 212 return (0); 213 214 mtx_assert(&q->lock, MA_OWNED); 215 if (reclaim > 0) { 216 t3_free_tx_desc(q, reclaim); 217 q->cleaned += reclaim; 218 q->in_use -= reclaim; 219 } 220 return (reclaim); 221} 222 223/** 224 * should_restart_tx - are there enough resources to restart a Tx queue? 225 * @q: the Tx queue 226 * 227 * Checks if there are enough descriptors to restart a suspended Tx queue. 228 */ 229static __inline int 230should_restart_tx(const struct sge_txq *q) 231{ 232 unsigned int r = q->processed - q->cleaned; 233 234 return q->in_use - r < (q->size >> 1); 235} 236 237/** 238 * t3_sge_init - initialize SGE 239 * @adap: the adapter 240 * @p: the SGE parameters 241 * 242 * Performs SGE initialization needed every time after a chip reset. 243 * We do not initialize any of the queue sets here, instead the driver 244 * top-level must request those individually. We also do not enable DMA 245 * here, that should be done after the queues have been set up. 246 */ 247void 248t3_sge_init(adapter_t *adap, struct sge_params *p) 249{ 250 u_int ctrl, ups; 251 252 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 253 254 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 255 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 256 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 257 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 258#if SGE_NUM_GENBITS == 1 259 ctrl |= F_EGRGENCTRL; 260#endif 261 if (adap->params.rev > 0) { 262 if (!(adap->flags & (USING_MSIX | USING_MSI))) 263 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 264 } 265 t3_write_reg(adap, A_SG_CONTROL, ctrl); 266 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 267 V_LORCQDRBTHRSH(512)); 268 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 269 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 270 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 271 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 272 adap->params.rev < T3_REV_C ? 1000 : 500); 273 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 274 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 275 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 276 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 277 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 278} 279 280 281/** 282 * sgl_len - calculates the size of an SGL of the given capacity 283 * @n: the number of SGL entries 284 * 285 * Calculates the number of flits needed for a scatter/gather list that 286 * can hold the given number of entries. 287 */ 288static __inline unsigned int 289sgl_len(unsigned int n) 290{ 291 return ((3 * n) / 2 + (n & 1)); 292} 293 294/** 295 * get_imm_packet - return the next ingress packet buffer from a response 296 * @resp: the response descriptor containing the packet data 297 * 298 * Return a packet containing the immediate data of the given response. 299 */ 300static int 301get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 302{ 303 304 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 305 m->m_ext.ext_buf = NULL; 306 m->m_ext.ext_type = 0; 307 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 308 return (0); 309} 310 311static __inline u_int 312flits_to_desc(u_int n) 313{ 314 return (flit_desc_map[n]); 315} 316 317#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 318 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 319 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 320 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 321 F_HIRCQPARITYERROR) 322#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 323#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 324 F_RSPQDISABLED) 325 326/** 327 * t3_sge_err_intr_handler - SGE async event interrupt handler 328 * @adapter: the adapter 329 * 330 * Interrupt handler for SGE asynchronous (non-data) events. 331 */ 332void 333t3_sge_err_intr_handler(adapter_t *adapter) 334{ 335 unsigned int v, status; 336 337 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 338 if (status & SGE_PARERR) 339 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 340 status & SGE_PARERR); 341 if (status & SGE_FRAMINGERR) 342 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 343 status & SGE_FRAMINGERR); 344 if (status & F_RSPQCREDITOVERFOW) 345 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 346 347 if (status & F_RSPQDISABLED) { 348 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 349 350 CH_ALERT(adapter, 351 "packet delivered to disabled response queue (0x%x)\n", 352 (v >> S_RSPQ0DISABLED) & 0xff); 353 } 354 355 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 356 if (status & SGE_FATALERR) 357 t3_fatal_err(adapter); 358} 359 360void 361t3_sge_prep(adapter_t *adap, struct sge_params *p) 362{ 363 int i, nqsets; 364 365 nqsets = min(SGE_QSETS, mp_ncpus*4); 366 367 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 368 369 while (!powerof2(fl_q_size)) 370 fl_q_size--; 371#if __FreeBSD_version >= 700111 372 if (cxgb_use_16k_clusters) 373 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 374 else 375 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 376#else 377 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE); 378#endif 379 while (!powerof2(jumbo_q_size)) 380 jumbo_q_size--; 381 382 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 383 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 384 385 for (i = 0; i < SGE_QSETS; ++i) { 386 struct qset_params *q = p->qset + i; 387 388 if (adap->params.nports > 2) { 389 q->coalesce_usecs = 50; 390 } else { 391#ifdef INVARIANTS 392 q->coalesce_usecs = 10; 393#else 394 q->coalesce_usecs = 5; 395#endif 396 } 397 q->polling = 0; 398 q->rspq_size = RSPQ_Q_SIZE; 399 q->fl_size = fl_q_size; 400 q->jumbo_size = jumbo_q_size; 401 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 402 q->txq_size[TXQ_OFLD] = 1024; 403 q->txq_size[TXQ_CTRL] = 256; 404 q->cong_thres = 0; 405 } 406} 407 408int 409t3_sge_alloc(adapter_t *sc) 410{ 411 412 /* The parent tag. */ 413 if (bus_dma_tag_create( NULL, /* parent */ 414 1, 0, /* algnmnt, boundary */ 415 BUS_SPACE_MAXADDR, /* lowaddr */ 416 BUS_SPACE_MAXADDR, /* highaddr */ 417 NULL, NULL, /* filter, filterarg */ 418 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 419 BUS_SPACE_UNRESTRICTED, /* nsegments */ 420 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 421 0, /* flags */ 422 NULL, NULL, /* lock, lockarg */ 423 &sc->parent_dmat)) { 424 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 425 return (ENOMEM); 426 } 427 428 /* 429 * DMA tag for normal sized RX frames 430 */ 431 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 432 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 433 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 434 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 435 return (ENOMEM); 436 } 437 438 /* 439 * DMA tag for jumbo sized RX frames. 440 */ 441 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 442 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 443 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 444 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 445 return (ENOMEM); 446 } 447 448 /* 449 * DMA tag for TX frames. 450 */ 451 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 452 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 453 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 454 NULL, NULL, &sc->tx_dmat)) { 455 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 456 return (ENOMEM); 457 } 458 459 return (0); 460} 461 462int 463t3_sge_free(struct adapter * sc) 464{ 465 466 if (sc->tx_dmat != NULL) 467 bus_dma_tag_destroy(sc->tx_dmat); 468 469 if (sc->rx_jumbo_dmat != NULL) 470 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 471 472 if (sc->rx_dmat != NULL) 473 bus_dma_tag_destroy(sc->rx_dmat); 474 475 if (sc->parent_dmat != NULL) 476 bus_dma_tag_destroy(sc->parent_dmat); 477 478 return (0); 479} 480 481void 482t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 483{ 484 485 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 486 qs->rspq.polling = 0 /* p->polling */; 487} 488 489#if !defined(__i386__) && !defined(__amd64__) 490static void 491refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 492{ 493 struct refill_fl_cb_arg *cb_arg = arg; 494 495 cb_arg->error = error; 496 cb_arg->seg = segs[0]; 497 cb_arg->nseg = nseg; 498 499} 500#endif 501/** 502 * refill_fl - refill an SGE free-buffer list 503 * @sc: the controller softc 504 * @q: the free-list to refill 505 * @n: the number of new buffers to allocate 506 * 507 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 508 * The caller must assure that @n does not exceed the queue's capacity. 509 */ 510static void 511refill_fl(adapter_t *sc, struct sge_fl *q, int n) 512{ 513 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 514 struct rx_desc *d = &q->desc[q->pidx]; 515 struct refill_fl_cb_arg cb_arg; 516 caddr_t cl; 517 int err, count = 0; 518 int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); 519 520 cb_arg.error = 0; 521 while (n--) { 522 /* 523 * We only allocate a cluster, mbuf allocation happens after rx 524 */ 525 if ((cl = cxgb_cache_get(q->zone)) == NULL) { 526 log(LOG_WARNING, "Failed to allocate cluster\n"); 527 goto done; 528 } 529 530 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 531 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 532 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 533 uma_zfree(q->zone, cl); 534 goto done; 535 } 536 sd->flags |= RX_SW_DESC_MAP_CREATED; 537 } 538#if !defined(__i386__) && !defined(__amd64__) 539 err = bus_dmamap_load(q->entry_tag, sd->map, 540 cl + header_size, q->buf_size, 541 refill_fl_cb, &cb_arg, 0); 542 543 if (err != 0 || cb_arg.error) { 544 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 545 /* 546 * XXX free cluster 547 */ 548 return; 549 } 550#else 551 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size)); 552#endif 553 sd->flags |= RX_SW_DESC_INUSE; 554 sd->rxsd_cl = cl; 555 sd->data = cl + header_size; 556 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 557 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 558 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 559 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 560 561 d++; 562 sd++; 563 564 if (++q->pidx == q->size) { 565 q->pidx = 0; 566 q->gen ^= 1; 567 sd = q->sdesc; 568 d = q->desc; 569 } 570 q->credits++; 571 count++; 572 } 573 574done: 575 if (count) 576 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 577} 578 579 580/** 581 * free_rx_bufs - free the Rx buffers on an SGE free list 582 * @sc: the controle softc 583 * @q: the SGE free list to clean up 584 * 585 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 586 * this queue should be stopped before calling this function. 587 */ 588static void 589free_rx_bufs(adapter_t *sc, struct sge_fl *q) 590{ 591 u_int cidx = q->cidx; 592 593 while (q->credits--) { 594 struct rx_sw_desc *d = &q->sdesc[cidx]; 595 596 if (d->flags & RX_SW_DESC_INUSE) { 597 bus_dmamap_unload(q->entry_tag, d->map); 598 bus_dmamap_destroy(q->entry_tag, d->map); 599 uma_zfree(q->zone, d->rxsd_cl); 600 } 601 d->rxsd_cl = NULL; 602 if (++cidx == q->size) 603 cidx = 0; 604 } 605} 606 607static __inline void 608__refill_fl(adapter_t *adap, struct sge_fl *fl) 609{ 610 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 611} 612 613static __inline void 614__refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 615{ 616 if ((fl->size - fl->credits) < max) 617 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 618} 619 620void 621refill_fl_service(adapter_t *adap, struct sge_fl *fl) 622{ 623 __refill_fl_lt(adap, fl, 512); 624} 625 626/** 627 * recycle_rx_buf - recycle a receive buffer 628 * @adapter: the adapter 629 * @q: the SGE free list 630 * @idx: index of buffer to recycle 631 * 632 * Recycles the specified buffer on the given free list by adding it at 633 * the next available slot on the list. 634 */ 635static void 636recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 637{ 638 struct rx_desc *from = &q->desc[idx]; 639 struct rx_desc *to = &q->desc[q->pidx]; 640 641 q->sdesc[q->pidx] = q->sdesc[idx]; 642 to->addr_lo = from->addr_lo; // already big endian 643 to->addr_hi = from->addr_hi; // likewise 644 wmb(); 645 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 646 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 647 q->credits++; 648 649 if (++q->pidx == q->size) { 650 q->pidx = 0; 651 q->gen ^= 1; 652 } 653 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 654} 655 656static void 657alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 658{ 659 uint32_t *addr; 660 661 addr = arg; 662 *addr = segs[0].ds_addr; 663} 664 665static int 666alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 667 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 668 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 669{ 670 size_t len = nelem * elem_size; 671 void *s = NULL; 672 void *p = NULL; 673 int err; 674 675 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 676 BUS_SPACE_MAXADDR_32BIT, 677 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 678 len, 0, NULL, NULL, tag)) != 0) { 679 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 680 return (ENOMEM); 681 } 682 683 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 684 map)) != 0) { 685 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 686 return (ENOMEM); 687 } 688 689 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 690 bzero(p, len); 691 *(void **)desc = p; 692 693 if (sw_size) { 694 len = nelem * sw_size; 695 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 696 *(void **)sdesc = s; 697 } 698 if (parent_entry_tag == NULL) 699 return (0); 700 701 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 702 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 703 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 704 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 705 NULL, NULL, entry_tag)) != 0) { 706 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 707 return (ENOMEM); 708 } 709 return (0); 710} 711 712static void 713sge_slow_intr_handler(void *arg, int ncount) 714{ 715 adapter_t *sc = arg; 716 717 t3_slow_intr_handler(sc); 718} 719 720/** 721 * sge_timer_cb - perform periodic maintenance of an SGE qset 722 * @data: the SGE queue set to maintain 723 * 724 * Runs periodically from a timer to perform maintenance of an SGE queue 725 * set. It performs two tasks: 726 * 727 * a) Cleans up any completed Tx descriptors that may still be pending. 728 * Normal descriptor cleanup happens when new packets are added to a Tx 729 * queue so this timer is relatively infrequent and does any cleanup only 730 * if the Tx queue has not seen any new packets in a while. We make a 731 * best effort attempt to reclaim descriptors, in that we don't wait 732 * around if we cannot get a queue's lock (which most likely is because 733 * someone else is queueing new packets and so will also handle the clean 734 * up). Since control queues use immediate data exclusively we don't 735 * bother cleaning them up here. 736 * 737 * b) Replenishes Rx queues that have run out due to memory shortage. 738 * Normally new Rx buffers are added when existing ones are consumed but 739 * when out of memory a queue can become empty. We try to add only a few 740 * buffers here, the queue will be replenished fully as these new buffers 741 * are used up if memory shortage has subsided. 742 * 743 * c) Return coalesced response queue credits in case a response queue is 744 * starved. 745 * 746 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 747 * fifo overflows and the FW doesn't implement any recovery scheme yet. 748 */ 749static void 750sge_timer_cb(void *arg) 751{ 752 adapter_t *sc = arg; 753#ifndef IFNET_MULTIQUEUE 754 struct port_info *pi; 755 struct sge_qset *qs; 756 struct sge_txq *txq; 757 int i, j; 758 int reclaim_ofl, refill_rx; 759 760 for (i = 0; i < sc->params.nports; i++) { 761 pi = &sc->port[i]; 762 for (j = 0; j < pi->nqsets; j++) { 763 qs = &sc->sge.qs[pi->first_qset + j]; 764 txq = &qs->txq[0]; 765 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 766 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 767 (qs->fl[1].credits < qs->fl[1].size)); 768 if (reclaim_ofl || refill_rx) { 769 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 770 break; 771 } 772 } 773 } 774#endif 775 if (sc->params.nports > 2) { 776 int i; 777 778 for_each_port(sc, i) { 779 struct port_info *pi = &sc->port[i]; 780 781 t3_write_reg(sc, A_SG_KDOORBELL, 782 F_SELEGRCNTX | 783 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 784 } 785 } 786 if (sc->open_device_map != 0) 787 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 788} 789 790/* 791 * This is meant to be a catch-all function to keep sge state private 792 * to sge.c 793 * 794 */ 795int 796t3_sge_init_adapter(adapter_t *sc) 797{ 798 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 799 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 800 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 801 mi_init(); 802 cxgb_cache_init(); 803 return (0); 804} 805 806int 807t3_sge_reset_adapter(adapter_t *sc) 808{ 809 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 810 return (0); 811} 812 813int 814t3_sge_init_port(struct port_info *pi) 815{ 816 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 817 return (0); 818} 819 820void 821t3_sge_deinit_sw(adapter_t *sc) 822{ 823 824 mi_deinit(); 825} 826 827/** 828 * refill_rspq - replenish an SGE response queue 829 * @adapter: the adapter 830 * @q: the response queue to replenish 831 * @credits: how many new responses to make available 832 * 833 * Replenishes a response queue by making the supplied number of responses 834 * available to HW. 835 */ 836static __inline void 837refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 838{ 839 840 /* mbufs are allocated on demand when a rspq entry is processed. */ 841 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 842 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 843} 844 845static __inline void 846sge_txq_reclaim_(struct sge_txq *txq, int force) 847{ 848 849 if (desc_reclaimable(txq) < 16) 850 return; 851 if (mtx_trylock(&txq->lock) == 0) 852 return; 853 reclaim_completed_tx_(txq, 16); 854 mtx_unlock(&txq->lock); 855 856} 857 858static void 859sge_txq_reclaim_handler(void *arg, int ncount) 860{ 861 struct sge_txq *q = arg; 862 863 sge_txq_reclaim_(q, TRUE); 864} 865 866 867 868static void 869sge_timer_reclaim(void *arg, int ncount) 870{ 871 struct port_info *pi = arg; 872 int i, nqsets = pi->nqsets; 873 adapter_t *sc = pi->adapter; 874 struct sge_qset *qs; 875 struct sge_txq *txq; 876 struct mtx *lock; 877 878#ifdef IFNET_MULTIQUEUE 879 panic("%s should not be called with multiqueue support\n", __FUNCTION__); 880#endif 881 for (i = 0; i < nqsets; i++) { 882 qs = &sc->sge.qs[pi->first_qset + i]; 883 884 txq = &qs->txq[TXQ_OFLD]; 885 sge_txq_reclaim_(txq, FALSE); 886 887 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 888 &sc->sge.qs[0].rspq.lock; 889 890 if (mtx_trylock(lock)) { 891 /* XXX currently assume that we are *NOT* polling */ 892 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 893 894 if (qs->fl[0].credits < qs->fl[0].size - 16) 895 __refill_fl(sc, &qs->fl[0]); 896 if (qs->fl[1].credits < qs->fl[1].size - 16) 897 __refill_fl(sc, &qs->fl[1]); 898 899 if (status & (1 << qs->rspq.cntxt_id)) { 900 if (qs->rspq.credits) { 901 refill_rspq(sc, &qs->rspq, 1); 902 qs->rspq.credits--; 903 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 904 1 << qs->rspq.cntxt_id); 905 } 906 } 907 mtx_unlock(lock); 908 } 909 } 910} 911 912/** 913 * init_qset_cntxt - initialize an SGE queue set context info 914 * @qs: the queue set 915 * @id: the queue set id 916 * 917 * Initializes the TIDs and context ids for the queues of a queue set. 918 */ 919static void 920init_qset_cntxt(struct sge_qset *qs, u_int id) 921{ 922 923 qs->rspq.cntxt_id = id; 924 qs->fl[0].cntxt_id = 2 * id; 925 qs->fl[1].cntxt_id = 2 * id + 1; 926 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 927 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 928 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 929 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 930 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 931 932 mbufq_init(&qs->txq[TXQ_ETH].sendq); 933 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 934 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 935} 936 937 938static void 939txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 940{ 941 txq->in_use += ndesc; 942 /* 943 * XXX we don't handle stopping of queue 944 * presumably start handles this when we bump against the end 945 */ 946 txqs->gen = txq->gen; 947 txq->unacked += ndesc; 948 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 949 txq->unacked &= 31; 950 txqs->pidx = txq->pidx; 951 txq->pidx += ndesc; 952#ifdef INVARIANTS 953 if (((txqs->pidx > txq->cidx) && 954 (txq->pidx < txqs->pidx) && 955 (txq->pidx >= txq->cidx)) || 956 ((txqs->pidx < txq->cidx) && 957 (txq->pidx >= txq-> cidx)) || 958 ((txqs->pidx < txq->cidx) && 959 (txq->cidx < txqs->pidx))) 960 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 961 txqs->pidx, txq->pidx, txq->cidx); 962#endif 963 if (txq->pidx >= txq->size) { 964 txq->pidx -= txq->size; 965 txq->gen ^= 1; 966 } 967 968} 969 970/** 971 * calc_tx_descs - calculate the number of Tx descriptors for a packet 972 * @m: the packet mbufs 973 * @nsegs: the number of segments 974 * 975 * Returns the number of Tx descriptors needed for the given Ethernet 976 * packet. Ethernet packets require addition of WR and CPL headers. 977 */ 978static __inline unsigned int 979calc_tx_descs(const struct mbuf *m, int nsegs) 980{ 981 unsigned int flits; 982 983 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 984 return 1; 985 986 flits = sgl_len(nsegs) + 2; 987#ifdef TSO_SUPPORTED 988 if (m->m_pkthdr.csum_flags & CSUM_TSO) 989 flits++; 990#endif 991 return flits_to_desc(flits); 992} 993 994static unsigned int 995busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 996 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 997{ 998 struct mbuf *m0; 999 int err, pktlen, pass = 0; 1000 1001retry: 1002 err = 0; 1003 m0 = *m; 1004 pktlen = m0->m_pkthdr.len; 1005#if defined(__i386__) || defined(__amd64__) 1006 if (busdma_map_sg_collapse(m, segs, nsegs) == 0) { 1007 goto done; 1008 } else 1009#endif 1010 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0); 1011 1012 if (err == 0) { 1013 goto done; 1014 } 1015 if (err == EFBIG && pass == 0) { 1016 pass = 1; 1017 /* Too many segments, try to defrag */ 1018 m0 = m_defrag(m0, M_DONTWAIT); 1019 if (m0 == NULL) { 1020 m_freem(*m); 1021 *m = NULL; 1022 return (ENOBUFS); 1023 } 1024 *m = m0; 1025 goto retry; 1026 } else if (err == ENOMEM) { 1027 return (err); 1028 } if (err) { 1029 if (cxgb_debug) 1030 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1031 m_freem(m0); 1032 *m = NULL; 1033 return (err); 1034 } 1035done: 1036#if !defined(__i386__) && !defined(__amd64__) 1037 bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE); 1038#endif 1039 txsd->flags |= TX_SW_DESC_MAPPED; 1040 1041 return (0); 1042} 1043 1044/** 1045 * make_sgl - populate a scatter/gather list for a packet 1046 * @sgp: the SGL to populate 1047 * @segs: the packet dma segments 1048 * @nsegs: the number of segments 1049 * 1050 * Generates a scatter/gather list for the buffers that make up a packet 1051 * and returns the SGL size in 8-byte words. The caller must size the SGL 1052 * appropriately. 1053 */ 1054static __inline void 1055make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1056{ 1057 int i, idx; 1058 1059 for (idx = 0, i = 0; i < nsegs; i++) { 1060 /* 1061 * firmware doesn't like empty segments 1062 */ 1063 if (segs[i].ds_len == 0) 1064 continue; 1065 if (i && idx == 0) 1066 ++sgp; 1067 1068 sgp->len[idx] = htobe32(segs[i].ds_len); 1069 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1070 idx ^= 1; 1071 } 1072 1073 if (idx) { 1074 sgp->len[idx] = 0; 1075 sgp->addr[idx] = 0; 1076 } 1077} 1078 1079/** 1080 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1081 * @adap: the adapter 1082 * @q: the Tx queue 1083 * 1084 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1085 * where the HW is going to sleep just after we checked, however, 1086 * then the interrupt handler will detect the outstanding TX packet 1087 * and ring the doorbell for us. 1088 * 1089 * When GTS is disabled we unconditionally ring the doorbell. 1090 */ 1091static __inline void 1092check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1093{ 1094#if USE_GTS 1095 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1096 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1097 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1098#ifdef T3_TRACE 1099 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1100 q->cntxt_id); 1101#endif 1102 t3_write_reg(adap, A_SG_KDOORBELL, 1103 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1104 } 1105#else 1106 wmb(); /* write descriptors before telling HW */ 1107 t3_write_reg(adap, A_SG_KDOORBELL, 1108 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1109#endif 1110} 1111 1112static __inline void 1113wr_gen2(struct tx_desc *d, unsigned int gen) 1114{ 1115#if SGE_NUM_GENBITS == 2 1116 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1117#endif 1118} 1119 1120/** 1121 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1122 * @ndesc: number of Tx descriptors spanned by the SGL 1123 * @txd: first Tx descriptor to be written 1124 * @txqs: txq state (generation and producer index) 1125 * @txq: the SGE Tx queue 1126 * @sgl: the SGL 1127 * @flits: number of flits to the start of the SGL in the first descriptor 1128 * @sgl_flits: the SGL size in flits 1129 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1130 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1131 * 1132 * Write a work request header and an associated SGL. If the SGL is 1133 * small enough to fit into one Tx descriptor it has already been written 1134 * and we just need to write the WR header. Otherwise we distribute the 1135 * SGL across the number of descriptors it spans. 1136 */ 1137static void 1138write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1139 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1140 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1141{ 1142 1143 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1144 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1145 1146 if (__predict_true(ndesc == 1)) { 1147 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1148 V_WR_SGLSFLT(flits)) | wr_hi; 1149 wmb(); 1150 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1151 V_WR_GEN(txqs->gen)) | wr_lo; 1152 /* XXX gen? */ 1153 wr_gen2(txd, txqs->gen); 1154 1155 } else { 1156 unsigned int ogen = txqs->gen; 1157 const uint64_t *fp = (const uint64_t *)sgl; 1158 struct work_request_hdr *wp = wrp; 1159 1160 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1161 V_WR_SGLSFLT(flits)) | wr_hi; 1162 1163 while (sgl_flits) { 1164 unsigned int avail = WR_FLITS - flits; 1165 1166 if (avail > sgl_flits) 1167 avail = sgl_flits; 1168 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1169 sgl_flits -= avail; 1170 ndesc--; 1171 if (!sgl_flits) 1172 break; 1173 1174 fp += avail; 1175 txd++; 1176 txsd++; 1177 if (++txqs->pidx == txq->size) { 1178 txqs->pidx = 0; 1179 txqs->gen ^= 1; 1180 txd = txq->desc; 1181 txsd = txq->sdesc; 1182 } 1183 1184 /* 1185 * when the head of the mbuf chain 1186 * is freed all clusters will be freed 1187 * with it 1188 */ 1189 KASSERT(txsd->mi.mi_base == NULL, 1190 ("overwriting valid entry mi_base==%p", txsd->mi.mi_base)); 1191 wrp = (struct work_request_hdr *)txd; 1192 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1193 V_WR_SGLSFLT(1)) | wr_hi; 1194 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1195 sgl_flits + 1)) | 1196 V_WR_GEN(txqs->gen)) | wr_lo; 1197 wr_gen2(txd, txqs->gen); 1198 flits = 1; 1199 } 1200 wrp->wr_hi |= htonl(F_WR_EOP); 1201 wmb(); 1202 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1203 wr_gen2((struct tx_desc *)wp, ogen); 1204 } 1205} 1206 1207/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1208#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1209 1210#ifdef VLAN_SUPPORTED 1211#define GET_VTAG(cntrl, m) \ 1212do { \ 1213 if ((m)->m_flags & M_VLANTAG) \ 1214 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1215} while (0) 1216 1217#define GET_VTAG_MI(cntrl, mi) \ 1218do { \ 1219 if ((mi)->mi_flags & M_VLANTAG) \ 1220 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \ 1221} while (0) 1222#else 1223#define GET_VTAG(cntrl, m) 1224#define GET_VTAG_MI(cntrl, m) 1225#endif 1226 1227int 1228t3_encap(struct sge_qset *qs, struct mbuf **m, int count) 1229{ 1230 adapter_t *sc; 1231 struct mbuf *m0; 1232 struct sge_txq *txq; 1233 struct txq_state txqs; 1234 struct port_info *pi; 1235 unsigned int ndesc, flits, cntrl, mlen; 1236 int err, nsegs, tso_info = 0; 1237 1238 struct work_request_hdr *wrp; 1239 struct tx_sw_desc *txsd; 1240 struct sg_ent *sgp, *sgl; 1241 uint32_t wr_hi, wr_lo, sgl_flits; 1242 bus_dma_segment_t segs[TX_MAX_SEGS]; 1243 1244 struct tx_desc *txd; 1245 struct mbuf_vec *mv; 1246 struct mbuf_iovec *mi; 1247 1248 DPRINTF("t3_encap cpu=%d ", curcpu); 1249 1250 mi = NULL; 1251 pi = qs->port; 1252 sc = pi->adapter; 1253 txq = &qs->txq[TXQ_ETH]; 1254 txd = &txq->desc[txq->pidx]; 1255 txsd = &txq->sdesc[txq->pidx]; 1256 sgl = txq->txq_sgl; 1257 m0 = *m; 1258 1259 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); 1260 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); 1261 if (cxgb_debug) 1262 printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx); 1263 1264 mtx_assert(&txq->lock, MA_OWNED); 1265 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1266/* 1267 * XXX need to add VLAN support for 6.x 1268 */ 1269#ifdef VLAN_SUPPORTED 1270 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1271 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1272#endif 1273 KASSERT(txsd->mi.mi_base == NULL, 1274 ("overwriting valid entry mi_base==%p", txsd->mi.mi_base)); 1275 if (count > 1) { 1276 if ((err = busdma_map_sg_vec(m, &m0, segs, count))) 1277 return (err); 1278 nsegs = count; 1279 } else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) { 1280 if (cxgb_debug) 1281 printf("failed ... err=%d\n", err); 1282 return (err); 1283 } 1284 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count)); 1285 1286 if ((m0->m_pkthdr.len > PIO_LEN) || (count > 1)) { 1287 mi_collapse_mbuf(&txsd->mi, m0); 1288 mi = &txsd->mi; 1289 } 1290 if (count > 1) { 1291 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1292 int i, fidx; 1293 struct mbuf_iovec *batchmi; 1294 1295 mv = mtomv(m0); 1296 batchmi = mv->mv_vec; 1297 1298 wrp = (struct work_request_hdr *)txd; 1299 1300 flits = count*2 + 1; 1301 txq_prod(txq, 1, &txqs); 1302 1303 for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) { 1304 struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i]; 1305 1306 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1307 GET_VTAG_MI(cntrl, batchmi); 1308 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1309 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1310 cntrl |= F_TXPKT_IPCSUM_DIS; 1311 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1312 cntrl |= F_TXPKT_L4CSUM_DIS; 1313 cbe->cntrl = htonl(cntrl); 1314 cbe->len = htonl(batchmi->mi_len | 0x80000000); 1315 cbe->addr = htobe64(segs[i].ds_addr); 1316 txd->flit[fidx] |= htobe64(1 << 24); 1317 } 1318 1319 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1320 V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1321 wmb(); 1322 wrp->wr_lo = htonl(V_WR_LEN(flits) | 1323 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1324 /* XXX gen? */ 1325 wr_gen2(txd, txqs.gen); 1326 check_ring_tx_db(sc, txq); 1327 1328 return (0); 1329 } else if (tso_info) { 1330 int min_size = TCPPKTHDRSIZE, eth_type, tagged; 1331 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1332 struct ip *ip; 1333 struct tcphdr *tcp; 1334 char *pkthdr; 1335 1336 txd->flit[2] = 0; 1337 GET_VTAG(cntrl, m0); 1338 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1339 hdr->cntrl = htonl(cntrl); 1340 mlen = m0->m_pkthdr.len; 1341 hdr->len = htonl(mlen | 0x80000000); 1342 1343 DPRINTF("tso buf len=%d\n", mlen); 1344 1345 tagged = m0->m_flags & M_VLANTAG; 1346 if (!tagged) 1347 min_size -= ETHER_VLAN_ENCAP_LEN; 1348 1349 if (__predict_false(mlen < min_size)) { 1350 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1351 m0, mlen, m0->m_pkthdr.tso_segsz, 1352 m0->m_pkthdr.csum_flags, m0->m_flags); 1353 panic("tx tso packet too small"); 1354 } 1355 1356 /* Make sure that ether, ip, tcp headers are all in m0 */ 1357 if (__predict_false(m0->m_len < min_size)) { 1358 m0 = m_pullup(m0, min_size); 1359 if (__predict_false(m0 == NULL)) { 1360 /* XXX panic probably an overreaction */ 1361 panic("couldn't fit header into mbuf"); 1362 } 1363 } 1364 pkthdr = m0->m_data; 1365 1366 if (tagged) { 1367 eth_type = CPL_ETH_II_VLAN; 1368 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1369 ETHER_VLAN_ENCAP_LEN); 1370 } else { 1371 eth_type = CPL_ETH_II; 1372 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1373 } 1374 tcp = (struct tcphdr *)((uint8_t *)ip + 1375 sizeof(*ip)); 1376 1377 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1378 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1379 V_LSO_TCPHDR_WORDS(tcp->th_off); 1380 hdr->lso_info = htonl(tso_info); 1381 1382 if (__predict_false(mlen <= PIO_LEN)) { 1383 /* pkt not undersized but fits in PIO_LEN 1384 * Indicates a TSO bug at the higher levels. 1385 * 1386 */ 1387 DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1388 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags); 1389 txq_prod(txq, 1, &txqs); 1390 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1391 m_freem(m0); 1392 m0 = NULL; 1393 flits = (mlen + 7) / 8 + 3; 1394 hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1395 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1396 F_WR_SOP | F_WR_EOP | txqs.compl); 1397 wmb(); 1398 hdr->wr.wr_lo = htonl(V_WR_LEN(flits) | 1399 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1400 1401 wr_gen2(txd, txqs.gen); 1402 check_ring_tx_db(sc, txq); 1403 return (0); 1404 } 1405 flits = 3; 1406 } else { 1407 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1408 1409 GET_VTAG(cntrl, m0); 1410 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1411 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1412 cntrl |= F_TXPKT_IPCSUM_DIS; 1413 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1414 cntrl |= F_TXPKT_L4CSUM_DIS; 1415 cpl->cntrl = htonl(cntrl); 1416 mlen = m0->m_pkthdr.len; 1417 cpl->len = htonl(mlen | 0x80000000); 1418 1419 if (mlen <= PIO_LEN) { 1420 txq_prod(txq, 1, &txqs); 1421 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1422 m_freem(m0); 1423 m0 = NULL; 1424 flits = (mlen + 7) / 8 + 2; 1425 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1426 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1427 F_WR_SOP | F_WR_EOP | txqs.compl); 1428 wmb(); 1429 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1430 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1431 1432 wr_gen2(txd, txqs.gen); 1433 check_ring_tx_db(sc, txq); 1434 DPRINTF("pio buf\n"); 1435 return (0); 1436 } 1437 DPRINTF("regular buf\n"); 1438 flits = 2; 1439 } 1440 wrp = (struct work_request_hdr *)txd; 1441 1442#ifdef nomore 1443 /* 1444 * XXX need to move into one of the helper routines above 1445 * 1446 */ 1447 if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0) 1448 return (err); 1449 m0 = *m; 1450#endif 1451 ndesc = calc_tx_descs(m0, nsegs); 1452 1453 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1454 make_sgl(sgp, segs, nsegs); 1455 1456 sgl_flits = sgl_len(nsegs); 1457 1458 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1459 txq_prod(txq, ndesc, &txqs); 1460 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1461 wr_lo = htonl(V_WR_TID(txq->token)); 1462 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1463 check_ring_tx_db(pi->adapter, txq); 1464 1465 if ((m0->m_type == MT_DATA) && 1466 ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) && 1467 (m0->m_ext.ext_type != EXT_PACKET)) { 1468 m0->m_flags &= ~M_EXT ; 1469 cxgb_mbufs_outstanding--; 1470 m_free(m0); 1471 } 1472 1473 return (0); 1474} 1475 1476 1477/** 1478 * write_imm - write a packet into a Tx descriptor as immediate data 1479 * @d: the Tx descriptor to write 1480 * @m: the packet 1481 * @len: the length of packet data to write as immediate data 1482 * @gen: the generation bit value to write 1483 * 1484 * Writes a packet as immediate data into a Tx descriptor. The packet 1485 * contains a work request at its beginning. We must write the packet 1486 * carefully so the SGE doesn't read accidentally before it's written in 1487 * its entirety. 1488 */ 1489static __inline void 1490write_imm(struct tx_desc *d, struct mbuf *m, 1491 unsigned int len, unsigned int gen) 1492{ 1493 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1494 struct work_request_hdr *to = (struct work_request_hdr *)d; 1495 1496 if (len > WR_LEN) 1497 panic("len too big %d\n", len); 1498 if (len < sizeof(*from)) 1499 panic("len too small %d", len); 1500 1501 memcpy(&to[1], &from[1], len - sizeof(*from)); 1502 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1503 V_WR_BCNTLFLT(len & 7)); 1504 wmb(); 1505 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1506 V_WR_LEN((len + 7) / 8)); 1507 wr_gen2(d, gen); 1508 1509 /* 1510 * This check is a hack we should really fix the logic so 1511 * that this can't happen 1512 */ 1513 if (m->m_type != MT_DONTFREE) 1514 m_freem(m); 1515 1516} 1517 1518/** 1519 * check_desc_avail - check descriptor availability on a send queue 1520 * @adap: the adapter 1521 * @q: the TX queue 1522 * @m: the packet needing the descriptors 1523 * @ndesc: the number of Tx descriptors needed 1524 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1525 * 1526 * Checks if the requested number of Tx descriptors is available on an 1527 * SGE send queue. If the queue is already suspended or not enough 1528 * descriptors are available the packet is queued for later transmission. 1529 * Must be called with the Tx queue locked. 1530 * 1531 * Returns 0 if enough descriptors are available, 1 if there aren't 1532 * enough descriptors and the packet has been queued, and 2 if the caller 1533 * needs to retry because there weren't enough descriptors at the 1534 * beginning of the call but some freed up in the mean time. 1535 */ 1536static __inline int 1537check_desc_avail(adapter_t *adap, struct sge_txq *q, 1538 struct mbuf *m, unsigned int ndesc, 1539 unsigned int qid) 1540{ 1541 /* 1542 * XXX We currently only use this for checking the control queue 1543 * the control queue is only used for binding qsets which happens 1544 * at init time so we are guaranteed enough descriptors 1545 */ 1546 if (__predict_false(!mbufq_empty(&q->sendq))) { 1547addq_exit: mbufq_tail(&q->sendq, m); 1548 return 1; 1549 } 1550 if (__predict_false(q->size - q->in_use < ndesc)) { 1551 1552 struct sge_qset *qs = txq_to_qset(q, qid); 1553 1554 printf("stopping q\n"); 1555 1556 setbit(&qs->txq_stopped, qid); 1557 smp_mb(); 1558 1559 if (should_restart_tx(q) && 1560 test_and_clear_bit(qid, &qs->txq_stopped)) 1561 return 2; 1562 1563 q->stops++; 1564 goto addq_exit; 1565 } 1566 return 0; 1567} 1568 1569 1570/** 1571 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1572 * @q: the SGE control Tx queue 1573 * 1574 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1575 * that send only immediate data (presently just the control queues) and 1576 * thus do not have any mbufs 1577 */ 1578static __inline void 1579reclaim_completed_tx_imm(struct sge_txq *q) 1580{ 1581 unsigned int reclaim = q->processed - q->cleaned; 1582 1583 mtx_assert(&q->lock, MA_OWNED); 1584 1585 q->in_use -= reclaim; 1586 q->cleaned += reclaim; 1587} 1588 1589static __inline int 1590immediate(const struct mbuf *m) 1591{ 1592 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1593} 1594 1595/** 1596 * ctrl_xmit - send a packet through an SGE control Tx queue 1597 * @adap: the adapter 1598 * @q: the control queue 1599 * @m: the packet 1600 * 1601 * Send a packet through an SGE control Tx queue. Packets sent through 1602 * a control queue must fit entirely as immediate data in a single Tx 1603 * descriptor and have no page fragments. 1604 */ 1605static int 1606ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1607{ 1608 int ret; 1609 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1610 1611 if (__predict_false(!immediate(m))) { 1612 m_freem(m); 1613 return 0; 1614 } 1615 1616 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1617 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1618 1619 mtx_lock(&q->lock); 1620again: reclaim_completed_tx_imm(q); 1621 1622 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1623 if (__predict_false(ret)) { 1624 if (ret == 1) { 1625 mtx_unlock(&q->lock); 1626 log(LOG_ERR, "no desc available\n"); 1627 return (ENOSPC); 1628 } 1629 goto again; 1630 } 1631 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1632 1633 q->in_use++; 1634 if (++q->pidx >= q->size) { 1635 q->pidx = 0; 1636 q->gen ^= 1; 1637 } 1638 mtx_unlock(&q->lock); 1639 wmb(); 1640 t3_write_reg(adap, A_SG_KDOORBELL, 1641 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1642 return (0); 1643} 1644 1645 1646/** 1647 * restart_ctrlq - restart a suspended control queue 1648 * @qs: the queue set cotaining the control queue 1649 * 1650 * Resumes transmission on a suspended Tx control queue. 1651 */ 1652static void 1653restart_ctrlq(void *data, int npending) 1654{ 1655 struct mbuf *m; 1656 struct sge_qset *qs = (struct sge_qset *)data; 1657 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1658 adapter_t *adap = qs->port->adapter; 1659 1660 log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use); 1661 1662 mtx_lock(&q->lock); 1663again: reclaim_completed_tx_imm(q); 1664 1665 while (q->in_use < q->size && 1666 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1667 1668 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1669 1670 if (++q->pidx >= q->size) { 1671 q->pidx = 0; 1672 q->gen ^= 1; 1673 } 1674 q->in_use++; 1675 } 1676 if (!mbufq_empty(&q->sendq)) { 1677 setbit(&qs->txq_stopped, TXQ_CTRL); 1678 smp_mb(); 1679 1680 if (should_restart_tx(q) && 1681 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1682 goto again; 1683 q->stops++; 1684 } 1685 mtx_unlock(&q->lock); 1686 wmb(); 1687 t3_write_reg(adap, A_SG_KDOORBELL, 1688 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1689} 1690 1691 1692/* 1693 * Send a management message through control queue 0 1694 */ 1695int 1696t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1697{ 1698 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1699} 1700 1701 1702/** 1703 * free_qset - free the resources of an SGE queue set 1704 * @sc: the controller owning the queue set 1705 * @q: the queue set 1706 * 1707 * Release the HW and SW resources associated with an SGE queue set, such 1708 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1709 * queue set must be quiesced prior to calling this. 1710 */ 1711void 1712t3_free_qset(adapter_t *sc, struct sge_qset *q) 1713{ 1714 int i; 1715 1716 t3_free_tx_desc_all(&q->txq[TXQ_ETH]); 1717 1718 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1719 if (q->txq[i].txq_mr != NULL) 1720 buf_ring_free(q->txq[i].txq_mr, M_DEVBUF); 1721 if (q->txq[i].txq_ifq != NULL) { 1722 ifq_delete(q->txq[i].txq_ifq); 1723 free(q->txq[i].txq_ifq, M_DEVBUF); 1724 } 1725 } 1726 1727 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1728 if (q->fl[i].desc) { 1729 mtx_lock_spin(&sc->sge.reg_lock); 1730 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1731 mtx_unlock_spin(&sc->sge.reg_lock); 1732 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1733 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1734 q->fl[i].desc_map); 1735 bus_dma_tag_destroy(q->fl[i].desc_tag); 1736 bus_dma_tag_destroy(q->fl[i].entry_tag); 1737 } 1738 if (q->fl[i].sdesc) { 1739 free_rx_bufs(sc, &q->fl[i]); 1740 free(q->fl[i].sdesc, M_DEVBUF); 1741 } 1742 } 1743 1744 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1745 if (q->txq[i].desc) { 1746 mtx_lock_spin(&sc->sge.reg_lock); 1747 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1748 mtx_unlock_spin(&sc->sge.reg_lock); 1749 bus_dmamap_unload(q->txq[i].desc_tag, 1750 q->txq[i].desc_map); 1751 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1752 q->txq[i].desc_map); 1753 bus_dma_tag_destroy(q->txq[i].desc_tag); 1754 bus_dma_tag_destroy(q->txq[i].entry_tag); 1755 MTX_DESTROY(&q->txq[i].lock); 1756 } 1757 if (q->txq[i].sdesc) { 1758 free(q->txq[i].sdesc, M_DEVBUF); 1759 } 1760 } 1761 1762 if (q->rspq.desc) { 1763 mtx_lock_spin(&sc->sge.reg_lock); 1764 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1765 mtx_unlock_spin(&sc->sge.reg_lock); 1766 1767 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1768 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1769 q->rspq.desc_map); 1770 bus_dma_tag_destroy(q->rspq.desc_tag); 1771 MTX_DESTROY(&q->rspq.lock); 1772 } 1773 1774#ifdef LRO_SUPPORTED 1775 tcp_lro_free(&q->lro.ctrl); 1776#endif 1777 1778 bzero(q, sizeof(*q)); 1779} 1780 1781/** 1782 * t3_free_sge_resources - free SGE resources 1783 * @sc: the adapter softc 1784 * 1785 * Frees resources used by the SGE queue sets. 1786 */ 1787void 1788t3_free_sge_resources(adapter_t *sc) 1789{ 1790 int i, nqsets; 1791 1792#ifdef IFNET_MULTIQUEUE 1793 panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__); 1794#endif 1795 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1796 nqsets += sc->port[i].nqsets; 1797 1798 for (i = 0; i < nqsets; ++i) 1799 t3_free_qset(sc, &sc->sge.qs[i]); 1800} 1801 1802/** 1803 * t3_sge_start - enable SGE 1804 * @sc: the controller softc 1805 * 1806 * Enables the SGE for DMAs. This is the last step in starting packet 1807 * transfers. 1808 */ 1809void 1810t3_sge_start(adapter_t *sc) 1811{ 1812 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1813} 1814 1815/** 1816 * t3_sge_stop - disable SGE operation 1817 * @sc: the adapter 1818 * 1819 * Disables the DMA engine. This can be called in emeregencies (e.g., 1820 * from error interrupts) or from normal process context. In the latter 1821 * case it also disables any pending queue restart tasklets. Note that 1822 * if it is called in interrupt context it cannot disable the restart 1823 * tasklets as it cannot wait, however the tasklets will have no effect 1824 * since the doorbells are disabled and the driver will call this again 1825 * later from process context, at which time the tasklets will be stopped 1826 * if they are still running. 1827 */ 1828void 1829t3_sge_stop(adapter_t *sc) 1830{ 1831 int i, nqsets; 1832 1833 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1834 1835 if (sc->tq == NULL) 1836 return; 1837 1838 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1839 nqsets += sc->port[i].nqsets; 1840#ifdef notyet 1841 /* 1842 * 1843 * XXX 1844 */ 1845 for (i = 0; i < nqsets; ++i) { 1846 struct sge_qset *qs = &sc->sge.qs[i]; 1847 1848 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 1849 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 1850 } 1851#endif 1852} 1853 1854/** 1855 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 1856 * @adapter: the adapter 1857 * @q: the Tx queue to reclaim descriptors from 1858 * @reclaimable: the number of descriptors to reclaim 1859 * @m_vec_size: maximum number of buffers to reclaim 1860 * @desc_reclaimed: returns the number of descriptors reclaimed 1861 * 1862 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1863 * Tx buffers. Called with the Tx queue lock held. 1864 * 1865 * Returns number of buffers of reclaimed 1866 */ 1867void 1868t3_free_tx_desc(struct sge_txq *q, int reclaimable) 1869{ 1870 struct tx_sw_desc *txsd; 1871 unsigned int cidx; 1872 1873#ifdef T3_TRACE 1874 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1875 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 1876#endif 1877 cidx = q->cidx; 1878 txsd = &q->sdesc[cidx]; 1879 DPRINTF("reclaiming %d WR\n", reclaimable); 1880 mtx_assert(&q->lock, MA_OWNED); 1881 while (reclaimable--) { 1882 DPRINTF("cidx=%d d=%p\n", cidx, txsd); 1883 if (txsd->mi.mi_base != NULL) { 1884 if (txsd->flags & TX_SW_DESC_MAPPED) { 1885 bus_dmamap_unload(q->entry_tag, txsd->map); 1886 txsd->flags &= ~TX_SW_DESC_MAPPED; 1887 } 1888 m_freem_iovec(&txsd->mi); 1889#if 0 1890 buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__); 1891#endif 1892 txsd->mi.mi_base = NULL; 1893 } else 1894 q->txq_skipped++; 1895 1896 ++txsd; 1897 if (++cidx == q->size) { 1898 cidx = 0; 1899 txsd = q->sdesc; 1900 } 1901 } 1902 q->cidx = cidx; 1903 1904} 1905 1906void 1907t3_free_tx_desc_all(struct sge_txq *q) 1908{ 1909 int i; 1910 struct tx_sw_desc *txsd; 1911 1912 for (i = 0; i < q->size; i++) { 1913 txsd = &q->sdesc[i]; 1914 if (txsd->mi.mi_base != NULL) { 1915 if (txsd->flags & TX_SW_DESC_MAPPED) { 1916 bus_dmamap_unload(q->entry_tag, txsd->map); 1917 txsd->flags &= ~TX_SW_DESC_MAPPED; 1918 } 1919 m_freem_iovec(&txsd->mi); 1920 bzero(&txsd->mi, sizeof(txsd->mi)); 1921 } 1922 } 1923} 1924 1925/** 1926 * is_new_response - check if a response is newly written 1927 * @r: the response descriptor 1928 * @q: the response queue 1929 * 1930 * Returns true if a response descriptor contains a yet unprocessed 1931 * response. 1932 */ 1933static __inline int 1934is_new_response(const struct rsp_desc *r, 1935 const struct sge_rspq *q) 1936{ 1937 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1938} 1939 1940#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1941#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1942 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1943 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1944 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1945 1946/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1947#define NOMEM_INTR_DELAY 2500 1948 1949/** 1950 * write_ofld_wr - write an offload work request 1951 * @adap: the adapter 1952 * @m: the packet to send 1953 * @q: the Tx queue 1954 * @pidx: index of the first Tx descriptor to write 1955 * @gen: the generation value to use 1956 * @ndesc: number of descriptors the packet will occupy 1957 * 1958 * Write an offload work request to send the supplied packet. The packet 1959 * data already carry the work request with most fields populated. 1960 */ 1961static void 1962write_ofld_wr(adapter_t *adap, struct mbuf *m, 1963 struct sge_txq *q, unsigned int pidx, 1964 unsigned int gen, unsigned int ndesc, 1965 bus_dma_segment_t *segs, unsigned int nsegs) 1966{ 1967 unsigned int sgl_flits, flits; 1968 struct work_request_hdr *from; 1969 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1970 struct tx_desc *d = &q->desc[pidx]; 1971 struct txq_state txqs; 1972 1973 if (immediate(m) && nsegs == 0) { 1974 write_imm(d, m, m->m_len, gen); 1975 return; 1976 } 1977 1978 /* Only TX_DATA builds SGLs */ 1979 from = mtod(m, struct work_request_hdr *); 1980 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 1981 1982 flits = m->m_len / 8; 1983 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1984 1985 make_sgl(sgp, segs, nsegs); 1986 sgl_flits = sgl_len(nsegs); 1987 1988 txqs.gen = gen; 1989 txqs.pidx = pidx; 1990 txqs.compl = 0; 1991 1992 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1993 from->wr_hi, from->wr_lo); 1994} 1995 1996/** 1997 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1998 * @m: the packet 1999 * 2000 * Returns the number of Tx descriptors needed for the given offload 2001 * packet. These packets are already fully constructed. 2002 */ 2003static __inline unsigned int 2004calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2005{ 2006 unsigned int flits, cnt = 0; 2007 int ndescs; 2008 2009 if (m->m_len <= WR_LEN && nsegs == 0) 2010 return (1); /* packet fits as immediate data */ 2011 2012 if (m->m_flags & M_IOVEC) 2013 cnt = mtomv(m)->mv_count; 2014 else 2015 cnt = nsegs; 2016 2017 /* headers */ 2018 flits = m->m_len / 8; 2019 2020 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2021 2022 return (ndescs); 2023} 2024 2025/** 2026 * ofld_xmit - send a packet through an offload queue 2027 * @adap: the adapter 2028 * @q: the Tx offload queue 2029 * @m: the packet 2030 * 2031 * Send an offload packet through an SGE offload queue. 2032 */ 2033static int 2034ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 2035{ 2036 int ret, nsegs; 2037 unsigned int ndesc; 2038 unsigned int pidx, gen; 2039 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2040 struct tx_sw_desc *stx; 2041 2042 nsegs = m_get_sgllen(m); 2043 vsegs = m_get_sgl(m); 2044 ndesc = calc_tx_descs_ofld(m, nsegs); 2045 busdma_map_sgl(vsegs, segs, nsegs); 2046 2047 stx = &q->sdesc[q->pidx]; 2048 KASSERT(stx->mi.mi_base == NULL, ("mi_base set")); 2049 2050 mtx_lock(&q->lock); 2051again: reclaim_completed_tx_(q, 16); 2052 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2053 if (__predict_false(ret)) { 2054 if (ret == 1) { 2055 printf("no ofld desc avail\n"); 2056 2057 m_set_priority(m, ndesc); /* save for restart */ 2058 mtx_unlock(&q->lock); 2059 return (EINTR); 2060 } 2061 goto again; 2062 } 2063 2064 gen = q->gen; 2065 q->in_use += ndesc; 2066 pidx = q->pidx; 2067 q->pidx += ndesc; 2068 if (q->pidx >= q->size) { 2069 q->pidx -= q->size; 2070 q->gen ^= 1; 2071 } 2072#ifdef T3_TRACE 2073 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2074 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2075 ndesc, pidx, skb->len, skb->len - skb->data_len, 2076 skb_shinfo(skb)->nr_frags); 2077#endif 2078 mtx_unlock(&q->lock); 2079 2080 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2081 check_ring_tx_db(adap, q); 2082 return (0); 2083} 2084 2085/** 2086 * restart_offloadq - restart a suspended offload queue 2087 * @qs: the queue set cotaining the offload queue 2088 * 2089 * Resumes transmission on a suspended Tx offload queue. 2090 */ 2091static void 2092restart_offloadq(void *data, int npending) 2093{ 2094 struct mbuf *m; 2095 struct sge_qset *qs = data; 2096 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2097 adapter_t *adap = qs->port->adapter; 2098 bus_dma_segment_t segs[TX_MAX_SEGS]; 2099 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2100 int nsegs, cleaned; 2101 2102 mtx_lock(&q->lock); 2103again: cleaned = reclaim_completed_tx_(q, 16); 2104 2105 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2106 unsigned int gen, pidx; 2107 unsigned int ndesc = m_get_priority(m); 2108 2109 if (__predict_false(q->size - q->in_use < ndesc)) { 2110 setbit(&qs->txq_stopped, TXQ_OFLD); 2111 smp_mb(); 2112 2113 if (should_restart_tx(q) && 2114 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2115 goto again; 2116 q->stops++; 2117 break; 2118 } 2119 2120 gen = q->gen; 2121 q->in_use += ndesc; 2122 pidx = q->pidx; 2123 q->pidx += ndesc; 2124 if (q->pidx >= q->size) { 2125 q->pidx -= q->size; 2126 q->gen ^= 1; 2127 } 2128 2129 (void)mbufq_dequeue(&q->sendq); 2130 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2131 mtx_unlock(&q->lock); 2132 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2133 mtx_lock(&q->lock); 2134 } 2135 mtx_unlock(&q->lock); 2136 2137#if USE_GTS 2138 set_bit(TXQ_RUNNING, &q->flags); 2139 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2140#endif 2141 wmb(); 2142 t3_write_reg(adap, A_SG_KDOORBELL, 2143 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2144} 2145 2146/** 2147 * queue_set - return the queue set a packet should use 2148 * @m: the packet 2149 * 2150 * Maps a packet to the SGE queue set it should use. The desired queue 2151 * set is carried in bits 1-3 in the packet's priority. 2152 */ 2153static __inline int 2154queue_set(const struct mbuf *m) 2155{ 2156 return m_get_priority(m) >> 1; 2157} 2158 2159/** 2160 * is_ctrl_pkt - return whether an offload packet is a control packet 2161 * @m: the packet 2162 * 2163 * Determines whether an offload packet should use an OFLD or a CTRL 2164 * Tx queue. This is indicated by bit 0 in the packet's priority. 2165 */ 2166static __inline int 2167is_ctrl_pkt(const struct mbuf *m) 2168{ 2169 return m_get_priority(m) & 1; 2170} 2171 2172/** 2173 * t3_offload_tx - send an offload packet 2174 * @tdev: the offload device to send to 2175 * @m: the packet 2176 * 2177 * Sends an offload packet. We use the packet priority to select the 2178 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2179 * should be sent as regular or control, bits 1-3 select the queue set. 2180 */ 2181int 2182t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2183{ 2184 adapter_t *adap = tdev2adap(tdev); 2185 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2186 2187 if (__predict_false(is_ctrl_pkt(m))) 2188 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 2189 2190 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 2191} 2192 2193/** 2194 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2195 * @tdev: the offload device that will be receiving the packets 2196 * @q: the SGE response queue that assembled the bundle 2197 * @m: the partial bundle 2198 * @n: the number of packets in the bundle 2199 * 2200 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2201 */ 2202static __inline void 2203deliver_partial_bundle(struct t3cdev *tdev, 2204 struct sge_rspq *q, 2205 struct mbuf *mbufs[], int n) 2206{ 2207 if (n) { 2208 q->offload_bundles++; 2209 cxgb_ofld_recv(tdev, mbufs, n); 2210 } 2211} 2212 2213static __inline int 2214rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2215 struct mbuf *m, struct mbuf *rx_gather[], 2216 unsigned int gather_idx) 2217{ 2218 2219 rq->offload_pkts++; 2220 m->m_pkthdr.header = mtod(m, void *); 2221 rx_gather[gather_idx++] = m; 2222 if (gather_idx == RX_BUNDLE_SIZE) { 2223 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2224 gather_idx = 0; 2225 rq->offload_bundles++; 2226 } 2227 return (gather_idx); 2228} 2229 2230static void 2231restart_tx(struct sge_qset *qs) 2232{ 2233 struct adapter *sc = qs->port->adapter; 2234 2235 2236 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2237 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2238 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2239 qs->txq[TXQ_OFLD].restarts++; 2240 DPRINTF("restarting TXQ_OFLD\n"); 2241 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2242 } 2243 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2244 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2245 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2246 qs->txq[TXQ_CTRL].in_use); 2247 2248 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2249 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2250 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2251 qs->txq[TXQ_CTRL].restarts++; 2252 DPRINTF("restarting TXQ_CTRL\n"); 2253 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2254 } 2255} 2256 2257/** 2258 * t3_sge_alloc_qset - initialize an SGE queue set 2259 * @sc: the controller softc 2260 * @id: the queue set id 2261 * @nports: how many Ethernet ports will be using this queue set 2262 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2263 * @p: configuration parameters for this queue set 2264 * @ntxq: number of Tx queues for the queue set 2265 * @pi: port info for queue set 2266 * 2267 * Allocate resources and initialize an SGE queue set. A queue set 2268 * comprises a response queue, two Rx free-buffer queues, and up to 3 2269 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2270 * queue, offload queue, and control queue. 2271 */ 2272int 2273t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2274 const struct qset_params *p, int ntxq, struct port_info *pi) 2275{ 2276 struct sge_qset *q = &sc->sge.qs[id]; 2277 int i, header_size, ret = 0; 2278 2279 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2280 2281 if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2282 M_DEVBUF, M_WAITOK, &q->txq[i].lock)) == NULL) { 2283 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2284 goto err; 2285 } 2286 if ((q->txq[i].txq_ifq = 2287 malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO)) 2288 == NULL) { 2289 device_printf(sc->dev, "failed to allocate ifq\n"); 2290 goto err; 2291 } 2292 ifq_init(q->txq[i].txq_ifq, pi->ifp); 2293 } 2294 init_qset_cntxt(q, id); 2295 q->idx = id; 2296 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2297 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2298 &q->fl[0].desc, &q->fl[0].sdesc, 2299 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2300 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2301 printf("error %d from alloc ring fl0\n", ret); 2302 goto err; 2303 } 2304 2305 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2306 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2307 &q->fl[1].desc, &q->fl[1].sdesc, 2308 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2309 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2310 printf("error %d from alloc ring fl1\n", ret); 2311 goto err; 2312 } 2313 2314 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2315 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2316 &q->rspq.desc_tag, &q->rspq.desc_map, 2317 NULL, NULL)) != 0) { 2318 printf("error %d from alloc ring rspq\n", ret); 2319 goto err; 2320 } 2321 2322 for (i = 0; i < ntxq; ++i) { 2323 /* 2324 * The control queue always uses immediate data so does not 2325 * need to keep track of any mbufs. 2326 * XXX Placeholder for future TOE support. 2327 */ 2328 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2329 2330 if ((ret = alloc_ring(sc, p->txq_size[i], 2331 sizeof(struct tx_desc), sz, 2332 &q->txq[i].phys_addr, &q->txq[i].desc, 2333 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2334 &q->txq[i].desc_map, 2335 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2336 printf("error %d from alloc ring tx %i\n", ret, i); 2337 goto err; 2338 } 2339 mbufq_init(&q->txq[i].sendq); 2340 q->txq[i].gen = 1; 2341 q->txq[i].size = p->txq_size[i]; 2342 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2343 device_get_unit(sc->dev), irq_vec_idx, i); 2344 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); 2345 } 2346 2347 q->txq[TXQ_ETH].port = pi; 2348 2349 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2350 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2351 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]); 2352 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]); 2353 2354 q->fl[0].gen = q->fl[1].gen = 1; 2355 q->fl[0].size = p->fl_size; 2356 q->fl[1].size = p->jumbo_size; 2357 2358 q->rspq.gen = 1; 2359 q->rspq.cidx = 0; 2360 q->rspq.size = p->rspq_size; 2361 2362 2363 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); 2364 q->txq[TXQ_ETH].stop_thres = nports * 2365 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2366 2367 q->fl[0].buf_size = (MCLBYTES - header_size); 2368 q->fl[0].zone = zone_clust; 2369 q->fl[0].type = EXT_CLUSTER; 2370#if __FreeBSD_version > 800000 2371 if (cxgb_use_16k_clusters) { 2372 q->fl[1].buf_size = MJUM16BYTES - header_size; 2373 q->fl[1].zone = zone_jumbo16; 2374 q->fl[1].type = EXT_JUMBO16; 2375 } else { 2376 q->fl[1].buf_size = MJUM9BYTES - header_size; 2377 q->fl[1].zone = zone_jumbo9; 2378 q->fl[1].type = EXT_JUMBO9; 2379 } 2380#else 2381 q->fl[1].buf_size = MJUMPAGESIZE - header_size; 2382 q->fl[1].zone = zone_jumbop; 2383 q->fl[1].type = EXT_JUMBOP; 2384#endif 2385 2386#ifdef LRO_SUPPORTED 2387 /* Allocate and setup the lro_ctrl structure */ 2388 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2389 ret = tcp_lro_init(&q->lro.ctrl); 2390 if (ret) { 2391 printf("error %d from tcp_lro_init\n", ret); 2392 goto err; 2393 } 2394 q->lro.ctrl.ifp = pi->ifp; 2395#endif 2396 2397 mtx_lock_spin(&sc->sge.reg_lock); 2398 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2399 q->rspq.phys_addr, q->rspq.size, 2400 q->fl[0].buf_size, 1, 0); 2401 if (ret) { 2402 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2403 goto err_unlock; 2404 } 2405 2406 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2407 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2408 q->fl[i].phys_addr, q->fl[i].size, 2409 q->fl[i].buf_size, p->cong_thres, 1, 2410 0); 2411 if (ret) { 2412 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2413 goto err_unlock; 2414 } 2415 } 2416 2417 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2418 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2419 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2420 1, 0); 2421 if (ret) { 2422 printf("error %d from t3_sge_init_ecntxt\n", ret); 2423 goto err_unlock; 2424 } 2425 2426 if (ntxq > 1) { 2427 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2428 USE_GTS, SGE_CNTXT_OFLD, id, 2429 q->txq[TXQ_OFLD].phys_addr, 2430 q->txq[TXQ_OFLD].size, 0, 1, 0); 2431 if (ret) { 2432 printf("error %d from t3_sge_init_ecntxt\n", ret); 2433 goto err_unlock; 2434 } 2435 } 2436 2437 if (ntxq > 2) { 2438 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2439 SGE_CNTXT_CTRL, id, 2440 q->txq[TXQ_CTRL].phys_addr, 2441 q->txq[TXQ_CTRL].size, 2442 q->txq[TXQ_CTRL].token, 1, 0); 2443 if (ret) { 2444 printf("error %d from t3_sge_init_ecntxt\n", ret); 2445 goto err_unlock; 2446 } 2447 } 2448 2449 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2450 device_get_unit(sc->dev), irq_vec_idx); 2451 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2452 2453 mtx_unlock_spin(&sc->sge.reg_lock); 2454 t3_update_qset_coalesce(q, p); 2455 q->port = pi; 2456 2457 refill_fl(sc, &q->fl[0], q->fl[0].size); 2458 refill_fl(sc, &q->fl[1], q->fl[1].size); 2459 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2460 2461 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2462 V_NEWTIMER(q->rspq.holdoff_tmr)); 2463 2464 return (0); 2465 2466err_unlock: 2467 mtx_unlock_spin(&sc->sge.reg_lock); 2468err: 2469 t3_free_qset(sc, q); 2470 2471 return (ret); 2472} 2473 2474/* 2475 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2476 * ethernet data. Hardware assistance with various checksums and any vlan tag 2477 * will also be taken into account here. 2478 */ 2479void 2480t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2481{ 2482 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2483 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2484 struct ifnet *ifp = pi->ifp; 2485 2486 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2487 2488 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2489 cpl->csum_valid && cpl->csum == 0xffff) { 2490 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2491 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2492 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2493 m->m_pkthdr.csum_data = 0xffff; 2494 } 2495 /* 2496 * XXX need to add VLAN support for 6.x 2497 */ 2498#ifdef VLAN_SUPPORTED 2499 if (__predict_false(cpl->vlan_valid)) { 2500 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2501 m->m_flags |= M_VLANTAG; 2502 } 2503#endif 2504 2505 m->m_pkthdr.rcvif = ifp; 2506 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2507#ifndef DISABLE_MBUF_IOVEC 2508 m_explode(m); 2509#endif 2510 /* 2511 * adjust after conversion to mbuf chain 2512 */ 2513 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2514 m->m_len -= (sizeof(*cpl) + ethpad); 2515 m->m_data += (sizeof(*cpl) + ethpad); 2516} 2517 2518static void 2519ext_free_handler(void *arg1, void * arg2) 2520{ 2521 uintptr_t type = (uintptr_t)arg2; 2522 uma_zone_t zone; 2523 struct mbuf *m; 2524 2525 m = arg1; 2526 zone = m_getzonefromtype(type); 2527 m->m_ext.ext_type = (int)type; 2528 cxgb_ext_freed++; 2529 cxgb_cache_put(zone, m); 2530} 2531 2532static void 2533init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone) 2534{ 2535 struct mbuf *m; 2536 int header_size; 2537 2538 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + 2539 sizeof(struct m_ext_) + sizeof(uint32_t); 2540 2541 bzero(cl, header_size); 2542 m = (struct mbuf *)cl; 2543 2544 cxgb_ext_inited++; 2545 SLIST_INIT(&m->m_pkthdr.tags); 2546 m->m_type = MT_DATA; 2547 m->m_flags = flags | M_NOFREE | M_EXT; 2548 m->m_data = cl + header_size; 2549 m->m_ext.ext_buf = cl; 2550 m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t)); 2551 m->m_ext.ext_size = m_getsizefromtype(type); 2552 m->m_ext.ext_free = ext_free_handler; 2553#if __FreeBSD_version >= 800016 2554 m->m_ext.ext_arg1 = cl; 2555 m->m_ext.ext_arg2 = (void *)(uintptr_t)type; 2556#else 2557 m->m_ext.ext_args = (void *)(uintptr_t)type; 2558#endif 2559 m->m_ext.ext_type = EXT_EXTREF; 2560 *(m->m_ext.ref_cnt) = 1; 2561 DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt); 2562} 2563 2564 2565/** 2566 * get_packet - return the next ingress packet buffer from a free list 2567 * @adap: the adapter that received the packet 2568 * @drop_thres: # of remaining buffers before we start dropping packets 2569 * @qs: the qset that the SGE free list holding the packet belongs to 2570 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2571 * @r: response descriptor 2572 * 2573 * Get the next packet from a free list and complete setup of the 2574 * sk_buff. If the packet is small we make a copy and recycle the 2575 * original buffer, otherwise we use the original buffer itself. If a 2576 * positive drop threshold is supplied packets are dropped and their 2577 * buffers recycled if (a) the number of remaining buffers is under the 2578 * threshold and the packet is too big to copy, or (b) the packet should 2579 * be copied but there is no memory for the copy. 2580 */ 2581#ifdef DISABLE_MBUF_IOVEC 2582 2583static int 2584get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2585 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2586{ 2587 2588 unsigned int len_cq = ntohl(r->len_cq); 2589 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2590 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2591 uint32_t len = G_RSPD_LEN(len_cq); 2592 uint32_t flags = ntohl(r->flags); 2593 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2594 caddr_t cl; 2595 struct mbuf *m, *m0; 2596 int ret = 0; 2597 2598 prefetch(sd->rxsd_cl); 2599 2600 fl->credits--; 2601 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2602 2603 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2604 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2605 goto skip_recycle; 2606 cl = mtod(m0, void *); 2607 memcpy(cl, sd->data, len); 2608 recycle_rx_buf(adap, fl, fl->cidx); 2609 m = m0; 2610 m0->m_len = len; 2611 } else { 2612 skip_recycle: 2613 2614 bus_dmamap_unload(fl->entry_tag, sd->map); 2615 cl = sd->rxsd_cl; 2616 m = m0 = (struct mbuf *)cl; 2617 2618 if ((sopeop == RSPQ_SOP_EOP) || 2619 (sopeop == RSPQ_SOP)) 2620 flags = M_PKTHDR; 2621 init_cluster_mbuf(cl, flags, fl->type, fl->zone); 2622 m0->m_len = len; 2623 } 2624 switch(sopeop) { 2625 case RSPQ_SOP_EOP: 2626 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2627 mh->mh_head = mh->mh_tail = m; 2628 m->m_pkthdr.len = len; 2629 ret = 1; 2630 break; 2631 case RSPQ_NSOP_NEOP: 2632 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2633 if (mh->mh_tail == NULL) { 2634 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2635 m_freem(m); 2636 break; 2637 } 2638 mh->mh_tail->m_next = m; 2639 mh->mh_tail = m; 2640 mh->mh_head->m_pkthdr.len += len; 2641 ret = 0; 2642 break; 2643 case RSPQ_SOP: 2644 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2645 m->m_pkthdr.len = len; 2646 mh->mh_head = mh->mh_tail = m; 2647 ret = 0; 2648 break; 2649 case RSPQ_EOP: 2650 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2651 mh->mh_head->m_pkthdr.len += len; 2652 mh->mh_tail->m_next = m; 2653 mh->mh_tail = m; 2654 ret = 1; 2655 break; 2656 } 2657 if (++fl->cidx == fl->size) 2658 fl->cidx = 0; 2659 2660 return (ret); 2661} 2662 2663#else 2664 2665static int 2666get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2667 struct mbuf **m, struct rsp_desc *r) 2668{ 2669 2670 unsigned int len_cq = ntohl(r->len_cq); 2671 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2672 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2673 uint32_t len = G_RSPD_LEN(len_cq); 2674 uint32_t flags = ntohl(r->flags); 2675 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2676 void *cl; 2677 int ret = 0; 2678 struct mbuf *m0; 2679#if 0 2680 if ((sd + 1 )->rxsd_cl) 2681 prefetch((sd + 1)->rxsd_cl); 2682 if ((sd + 2)->rxsd_cl) 2683 prefetch((sd + 2)->rxsd_cl); 2684#endif 2685 DPRINTF("rx cpu=%d\n", curcpu); 2686 fl->credits--; 2687 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2688 2689 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2690 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2691 goto skip_recycle; 2692 cl = mtod(m0, void *); 2693 memcpy(cl, sd->data, len); 2694 recycle_rx_buf(adap, fl, fl->cidx); 2695 *m = m0; 2696 } else { 2697 skip_recycle: 2698 bus_dmamap_unload(fl->entry_tag, sd->map); 2699 cl = sd->rxsd_cl; 2700 *m = m0 = (struct mbuf *)cl; 2701 } 2702 2703 switch(sopeop) { 2704 case RSPQ_SOP_EOP: 2705 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2706 if (cl == sd->rxsd_cl) 2707 init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone); 2708 m0->m_len = m0->m_pkthdr.len = len; 2709 ret = 1; 2710 goto done; 2711 break; 2712 case RSPQ_NSOP_NEOP: 2713 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2714 panic("chaining unsupported"); 2715 ret = 0; 2716 break; 2717 case RSPQ_SOP: 2718 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2719 panic("chaining unsupported"); 2720 m_iovinit(m0); 2721 ret = 0; 2722 break; 2723 case RSPQ_EOP: 2724 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2725 panic("chaining unsupported"); 2726 ret = 1; 2727 break; 2728 } 2729 panic("append not supported"); 2730#if 0 2731 m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref); 2732#endif 2733done: 2734 if (++fl->cidx == fl->size) 2735 fl->cidx = 0; 2736 2737 return (ret); 2738} 2739#endif 2740/** 2741 * handle_rsp_cntrl_info - handles control information in a response 2742 * @qs: the queue set corresponding to the response 2743 * @flags: the response control flags 2744 * 2745 * Handles the control information of an SGE response, such as GTS 2746 * indications and completion credits for the queue set's Tx queues. 2747 * HW coalesces credits, we don't do any extra SW coalescing. 2748 */ 2749static __inline void 2750handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2751{ 2752 unsigned int credits; 2753 2754#if USE_GTS 2755 if (flags & F_RSPD_TXQ0_GTS) 2756 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2757#endif 2758 credits = G_RSPD_TXQ0_CR(flags); 2759 if (credits) 2760 qs->txq[TXQ_ETH].processed += credits; 2761 2762 credits = G_RSPD_TXQ2_CR(flags); 2763 if (credits) 2764 qs->txq[TXQ_CTRL].processed += credits; 2765 2766# if USE_GTS 2767 if (flags & F_RSPD_TXQ1_GTS) 2768 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2769# endif 2770 credits = G_RSPD_TXQ1_CR(flags); 2771 if (credits) 2772 qs->txq[TXQ_OFLD].processed += credits; 2773 2774} 2775 2776static void 2777check_ring_db(adapter_t *adap, struct sge_qset *qs, 2778 unsigned int sleeping) 2779{ 2780 ; 2781} 2782 2783/** 2784 * process_responses - process responses from an SGE response queue 2785 * @adap: the adapter 2786 * @qs: the queue set to which the response queue belongs 2787 * @budget: how many responses can be processed in this round 2788 * 2789 * Process responses from an SGE response queue up to the supplied budget. 2790 * Responses include received packets as well as credits and other events 2791 * for the queues that belong to the response queue's queue set. 2792 * A negative budget is effectively unlimited. 2793 * 2794 * Additionally choose the interrupt holdoff time for the next interrupt 2795 * on this queue. If the system is under memory shortage use a fairly 2796 * long delay to help recovery. 2797 */ 2798int 2799process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2800{ 2801 struct sge_rspq *rspq = &qs->rspq; 2802 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2803 int budget_left = budget; 2804 unsigned int sleeping = 0; 2805#ifdef LRO_SUPPORTED 2806 int lro_enabled = qs->lro.enabled; 2807 int skip_lro; 2808 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2809#endif 2810 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2811 int ngathered = 0; 2812#ifdef DEBUG 2813 static int last_holdoff = 0; 2814 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2815 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2816 last_holdoff = rspq->holdoff_tmr; 2817 } 2818#endif 2819 rspq->next_holdoff = rspq->holdoff_tmr; 2820 2821 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2822 int eth, eop = 0, ethpad = 0; 2823 uint32_t flags = ntohl(r->flags); 2824 uint32_t rss_csum = *(const uint32_t *)r; 2825 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2826 2827 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2828 2829 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2830 struct mbuf *m; 2831 2832 if (cxgb_debug) 2833 printf("async notification\n"); 2834 2835 if (rspq->rspq_mh.mh_head == NULL) { 2836 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2837 m = rspq->rspq_mh.mh_head; 2838 } else { 2839 m = m_gethdr(M_DONTWAIT, MT_DATA); 2840 } 2841 2842 /* XXX m is lost here if rspq->rspq_mbuf is not NULL */ 2843 2844 if (m == NULL) 2845 goto no_mem; 2846 2847 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 2848 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 2849 *mtod(m, char *) = CPL_ASYNC_NOTIF; 2850 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 2851 eop = 1; 2852 rspq->async_notif++; 2853 goto skip; 2854 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2855 struct mbuf *m = NULL; 2856 2857 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 2858 r->rss_hdr.opcode, rspq->cidx); 2859 if (rspq->rspq_mh.mh_head == NULL) 2860 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2861 else 2862 m = m_gethdr(M_DONTWAIT, MT_DATA); 2863 2864 if (rspq->rspq_mh.mh_head == NULL && m == NULL) { 2865 no_mem: 2866 rspq->next_holdoff = NOMEM_INTR_DELAY; 2867 budget_left--; 2868 break; 2869 } 2870 get_imm_packet(adap, r, rspq->rspq_mh.mh_head); 2871 eop = 1; 2872 rspq->imm_data++; 2873 } else if (r->len_cq) { 2874 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2875 2876#ifdef DISABLE_MBUF_IOVEC 2877 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 2878#else 2879 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r); 2880#endif 2881#ifdef IFNET_MULTIQUEUE 2882 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID; 2883 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash; 2884#endif 2885 ethpad = 2; 2886 } else { 2887 DPRINTF("pure response\n"); 2888 rspq->pure_rsps++; 2889 } 2890 skip: 2891 if (flags & RSPD_CTRL_MASK) { 2892 sleeping |= flags & RSPD_GTS_MASK; 2893 handle_rsp_cntrl_info(qs, flags); 2894 } 2895 2896 r++; 2897 if (__predict_false(++rspq->cidx == rspq->size)) { 2898 rspq->cidx = 0; 2899 rspq->gen ^= 1; 2900 r = rspq->desc; 2901 } 2902 prefetch(r); 2903 if (++rspq->credits >= (rspq->size / 4)) { 2904 refill_rspq(adap, rspq, rspq->credits); 2905 rspq->credits = 0; 2906 } 2907 DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags); 2908 2909 if (!eth && eop) { 2910 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 2911 /* 2912 * XXX size mismatch 2913 */ 2914 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 2915 2916 2917 ngathered = rx_offload(&adap->tdev, rspq, 2918 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 2919 rspq->rspq_mh.mh_head = NULL; 2920 DPRINTF("received offload packet\n"); 2921 2922 } else if (eth && eop) { 2923 struct mbuf *m = rspq->rspq_mh.mh_head; 2924 prefetch(mtod(m, uint8_t *)); 2925 prefetch(mtod(m, uint8_t *) + L1_CACHE_BYTES); 2926 2927 t3_rx_eth(adap, rspq, m, ethpad); 2928 2929#ifdef LRO_SUPPORTED 2930 /* 2931 * The T304 sends incoming packets on any qset. If LRO 2932 * is also enabled, we could end up sending packet up 2933 * lro_ctrl->ifp's input. That is incorrect. 2934 * 2935 * The mbuf's rcvif was derived from the cpl header and 2936 * is accurate. Skip LRO and just use that. 2937 */ 2938 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 2939 2940 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && 2941 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) { 2942 /* successfully queue'd for LRO */ 2943 } else 2944#endif 2945 { 2946 /* 2947 * LRO not enabled, packet unsuitable for LRO, 2948 * or unable to queue. Pass it up right now in 2949 * either case. 2950 */ 2951 struct ifnet *ifp = m->m_pkthdr.rcvif; 2952 (*ifp->if_input)(ifp, m); 2953 } 2954 DPRINTF("received tunnel packet\n"); 2955 rspq->rspq_mh.mh_head = NULL; 2956 2957 } 2958 __refill_fl_lt(adap, &qs->fl[0], 32); 2959 __refill_fl_lt(adap, &qs->fl[1], 32); 2960 --budget_left; 2961 } 2962 2963 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2964 2965#ifdef LRO_SUPPORTED 2966 /* Flush LRO */ 2967 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 2968 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 2969 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 2970 tcp_lro_flush(lro_ctrl, queued); 2971 } 2972#endif 2973 2974 if (sleeping) 2975 check_ring_db(adap, qs, sleeping); 2976 2977 smp_mb(); /* commit Tx queue processed updates */ 2978 if (__predict_false(qs->txq_stopped > 1)) { 2979 printf("restarting tx on %p\n", qs); 2980 2981 restart_tx(qs); 2982 } 2983 2984 __refill_fl_lt(adap, &qs->fl[0], 512); 2985 __refill_fl_lt(adap, &qs->fl[1], 512); 2986 budget -= budget_left; 2987 return (budget); 2988} 2989 2990/* 2991 * A helper function that processes responses and issues GTS. 2992 */ 2993static __inline int 2994process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2995{ 2996 int work; 2997 static int last_holdoff = 0; 2998 2999 work = process_responses(adap, rspq_to_qset(rq), -1); 3000 3001 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3002 printf("next_holdoff=%d\n", rq->next_holdoff); 3003 last_holdoff = rq->next_holdoff; 3004 } 3005 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3006 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3007 3008 return (work); 3009} 3010 3011 3012/* 3013 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3014 * Handles data events from SGE response queues as well as error and other 3015 * async events as they all use the same interrupt pin. We use one SGE 3016 * response queue per port in this mode and protect all response queues with 3017 * queue 0's lock. 3018 */ 3019void 3020t3b_intr(void *data) 3021{ 3022 uint32_t i, map; 3023 adapter_t *adap = data; 3024 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3025 3026 t3_write_reg(adap, A_PL_CLI, 0); 3027 map = t3_read_reg(adap, A_SG_DATA_INTR); 3028 3029 if (!map) 3030 return; 3031 3032 if (__predict_false(map & F_ERRINTR)) 3033 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3034 3035 mtx_lock(&q0->lock); 3036 for_each_port(adap, i) 3037 if (map & (1 << i)) 3038 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3039 mtx_unlock(&q0->lock); 3040} 3041 3042/* 3043 * The MSI interrupt handler. This needs to handle data events from SGE 3044 * response queues as well as error and other async events as they all use 3045 * the same MSI vector. We use one SGE response queue per port in this mode 3046 * and protect all response queues with queue 0's lock. 3047 */ 3048void 3049t3_intr_msi(void *data) 3050{ 3051 adapter_t *adap = data; 3052 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3053 int i, new_packets = 0; 3054 3055 mtx_lock(&q0->lock); 3056 3057 for_each_port(adap, i) 3058 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3059 new_packets = 1; 3060 mtx_unlock(&q0->lock); 3061 if (new_packets == 0) 3062 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3063} 3064 3065void 3066t3_intr_msix(void *data) 3067{ 3068 struct sge_qset *qs = data; 3069 adapter_t *adap = qs->port->adapter; 3070 struct sge_rspq *rspq = &qs->rspq; 3071#ifndef IFNET_MULTIQUEUE 3072 mtx_lock(&rspq->lock); 3073#else 3074 if (mtx_trylock(&rspq->lock)) 3075#endif 3076 { 3077 3078 if (process_responses_gts(adap, rspq) == 0) 3079 rspq->unhandled_irqs++; 3080 mtx_unlock(&rspq->lock); 3081 } 3082} 3083 3084#define QDUMP_SBUF_SIZE 32 * 400 3085static int 3086t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3087{ 3088 struct sge_rspq *rspq; 3089 struct sge_qset *qs; 3090 int i, err, dump_end, idx; 3091 static int multiplier = 1; 3092 struct sbuf *sb; 3093 struct rsp_desc *rspd; 3094 uint32_t data[4]; 3095 3096 rspq = arg1; 3097 qs = rspq_to_qset(rspq); 3098 if (rspq->rspq_dump_count == 0) 3099 return (0); 3100 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3101 log(LOG_WARNING, 3102 "dump count is too large %d\n", rspq->rspq_dump_count); 3103 rspq->rspq_dump_count = 0; 3104 return (EINVAL); 3105 } 3106 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3107 log(LOG_WARNING, 3108 "dump start of %d is greater than queue size\n", 3109 rspq->rspq_dump_start); 3110 rspq->rspq_dump_start = 0; 3111 return (EINVAL); 3112 } 3113 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3114 if (err) 3115 return (err); 3116retry_sbufops: 3117 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3118 3119 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3120 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3121 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3122 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3123 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3124 3125 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3126 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3127 3128 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3129 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3130 idx = i & (RSPQ_Q_SIZE-1); 3131 3132 rspd = &rspq->desc[idx]; 3133 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3134 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3135 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3136 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3137 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3138 be32toh(rspd->len_cq), rspd->intr_gen); 3139 } 3140 if (sbuf_overflowed(sb)) { 3141 sbuf_delete(sb); 3142 multiplier++; 3143 goto retry_sbufops; 3144 } 3145 sbuf_finish(sb); 3146 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3147 sbuf_delete(sb); 3148 return (err); 3149} 3150 3151static int 3152t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3153{ 3154 struct sge_txq *txq; 3155 struct sge_qset *qs; 3156 int i, j, err, dump_end; 3157 static int multiplier = 1; 3158 struct sbuf *sb; 3159 struct tx_desc *txd; 3160 uint32_t *WR, wr_hi, wr_lo, gen; 3161 uint32_t data[4]; 3162 3163 txq = arg1; 3164 qs = txq_to_qset(txq, TXQ_ETH); 3165 if (txq->txq_dump_count == 0) { 3166 return (0); 3167 } 3168 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3169 log(LOG_WARNING, 3170 "dump count is too large %d\n", txq->txq_dump_count); 3171 txq->txq_dump_count = 1; 3172 return (EINVAL); 3173 } 3174 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3175 log(LOG_WARNING, 3176 "dump start of %d is greater than queue size\n", 3177 txq->txq_dump_start); 3178 txq->txq_dump_start = 0; 3179 return (EINVAL); 3180 } 3181 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3182 if (err) 3183 return (err); 3184 3185 3186retry_sbufops: 3187 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3188 3189 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3190 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3191 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3192 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3193 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3194 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3195 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3196 txq->txq_dump_start, 3197 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3198 3199 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3200 for (i = txq->txq_dump_start; i < dump_end; i++) { 3201 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3202 WR = (uint32_t *)txd->flit; 3203 wr_hi = ntohl(WR[0]); 3204 wr_lo = ntohl(WR[1]); 3205 gen = G_WR_GEN(wr_lo); 3206 3207 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3208 wr_hi, wr_lo, gen); 3209 for (j = 2; j < 30; j += 4) 3210 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3211 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3212 3213 } 3214 if (sbuf_overflowed(sb)) { 3215 sbuf_delete(sb); 3216 multiplier++; 3217 goto retry_sbufops; 3218 } 3219 sbuf_finish(sb); 3220 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3221 sbuf_delete(sb); 3222 return (err); 3223} 3224 3225static int 3226t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3227{ 3228 struct sge_txq *txq; 3229 struct sge_qset *qs; 3230 int i, j, err, dump_end; 3231 static int multiplier = 1; 3232 struct sbuf *sb; 3233 struct tx_desc *txd; 3234 uint32_t *WR, wr_hi, wr_lo, gen; 3235 3236 txq = arg1; 3237 qs = txq_to_qset(txq, TXQ_CTRL); 3238 if (txq->txq_dump_count == 0) { 3239 return (0); 3240 } 3241 if (txq->txq_dump_count > 256) { 3242 log(LOG_WARNING, 3243 "dump count is too large %d\n", txq->txq_dump_count); 3244 txq->txq_dump_count = 1; 3245 return (EINVAL); 3246 } 3247 if (txq->txq_dump_start > 255) { 3248 log(LOG_WARNING, 3249 "dump start of %d is greater than queue size\n", 3250 txq->txq_dump_start); 3251 txq->txq_dump_start = 0; 3252 return (EINVAL); 3253 } 3254 3255retry_sbufops: 3256 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3257 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3258 txq->txq_dump_start, 3259 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3260 3261 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3262 for (i = txq->txq_dump_start; i < dump_end; i++) { 3263 txd = &txq->desc[i & (255)]; 3264 WR = (uint32_t *)txd->flit; 3265 wr_hi = ntohl(WR[0]); 3266 wr_lo = ntohl(WR[1]); 3267 gen = G_WR_GEN(wr_lo); 3268 3269 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3270 wr_hi, wr_lo, gen); 3271 for (j = 2; j < 30; j += 4) 3272 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3273 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3274 3275 } 3276 if (sbuf_overflowed(sb)) { 3277 sbuf_delete(sb); 3278 multiplier++; 3279 goto retry_sbufops; 3280 } 3281 sbuf_finish(sb); 3282 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3283 sbuf_delete(sb); 3284 return (err); 3285} 3286 3287static int 3288t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3289{ 3290 adapter_t *sc = arg1; 3291 struct qset_params *qsp = &sc->params.sge.qset[0]; 3292 int coalesce_usecs; 3293 struct sge_qset *qs; 3294 int i, j, err, nqsets = 0; 3295 struct mtx *lock; 3296 3297 if ((sc->flags & FULL_INIT_DONE) == 0) 3298 return (ENXIO); 3299 3300 coalesce_usecs = qsp->coalesce_usecs; 3301 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3302 3303 if (err != 0) { 3304 return (err); 3305 } 3306 if (coalesce_usecs == qsp->coalesce_usecs) 3307 return (0); 3308 3309 for (i = 0; i < sc->params.nports; i++) 3310 for (j = 0; j < sc->port[i].nqsets; j++) 3311 nqsets++; 3312 3313 coalesce_usecs = max(1, coalesce_usecs); 3314 3315 for (i = 0; i < nqsets; i++) { 3316 qs = &sc->sge.qs[i]; 3317 qsp = &sc->params.sge.qset[i]; 3318 qsp->coalesce_usecs = coalesce_usecs; 3319 3320 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3321 &sc->sge.qs[0].rspq.lock; 3322 3323 mtx_lock(lock); 3324 t3_update_qset_coalesce(qs, qsp); 3325 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3326 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3327 mtx_unlock(lock); 3328 } 3329 3330 return (0); 3331} 3332 3333 3334void 3335t3_add_attach_sysctls(adapter_t *sc) 3336{ 3337 struct sysctl_ctx_list *ctx; 3338 struct sysctl_oid_list *children; 3339 3340 ctx = device_get_sysctl_ctx(sc->dev); 3341 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3342 3343 /* random information */ 3344 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3345 "firmware_version", 3346 CTLFLAG_RD, &sc->fw_version, 3347 0, "firmware version"); 3348 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3349 "hw_revision", 3350 CTLFLAG_RD, &sc->params.rev, 3351 0, "chip model"); 3352 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3353 "port_types", 3354 CTLFLAG_RD, &sc->port_types, 3355 0, "type of ports"); 3356 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3357 "enable_debug", 3358 CTLFLAG_RW, &cxgb_debug, 3359 0, "enable verbose debugging output"); 3360 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce", 3361 CTLFLAG_RD, &sc->tunq_coalesce, 3362 "#tunneled packets freed"); 3363 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3364 "txq_overrun", 3365 CTLFLAG_RD, &txq_fills, 3366 0, "#times txq overrun"); 3367 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3368 "pcpu_cache_enable", 3369 CTLFLAG_RW, &cxgb_pcpu_cache_enable, 3370 0, "#enable driver local pcpu caches"); 3371 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3372 "multiq_tx_enable", 3373 CTLFLAG_RW, &multiq_tx_enable, 3374 0, "enable transmit by multiple tx queues"); 3375 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3376 "coalesce_tx_enable", 3377 CTLFLAG_RW, &coalesce_tx_enable, 3378 0, "coalesce small packets in work requests - WARNING ALPHA"); 3379 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3380 "wakeup_tx_thread", 3381 CTLFLAG_RW, &wakeup_tx_thread, 3382 0, "wakeup tx thread if no transmitter running"); 3383 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3384 "cache_alloc", 3385 CTLFLAG_RD, &cxgb_cached_allocations, 3386 0, "#times a cluster was allocated from cache"); 3387 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3388 "cached", 3389 CTLFLAG_RD, &cxgb_cached, 3390 0, "#times a cluster was cached"); 3391 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3392 "ext_freed", 3393 CTLFLAG_RD, &cxgb_ext_freed, 3394 0, "#times a cluster was freed through ext_free"); 3395 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3396 "ext_inited", 3397 CTLFLAG_RD, &cxgb_ext_inited, 3398 0, "#times a cluster was initialized for ext_free"); 3399 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3400 "mbufs_outstanding", 3401 CTLFLAG_RD, &cxgb_mbufs_outstanding, 3402 0, "#mbufs in flight in the driver"); 3403 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3404 "pack_outstanding", 3405 CTLFLAG_RD, &cxgb_pack_outstanding, 3406 0, "#packet in flight in the driver"); 3407} 3408 3409 3410static const char *rspq_name = "rspq"; 3411static const char *txq_names[] = 3412{ 3413 "txq_eth", 3414 "txq_ofld", 3415 "txq_ctrl" 3416}; 3417 3418static int 3419sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3420{ 3421 struct port_info *p = arg1; 3422 uint64_t *parg; 3423 3424 if (!p) 3425 return (EINVAL); 3426 3427 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3428 3429 PORT_LOCK(p); 3430 t3_mac_update_stats(&p->mac); 3431 PORT_UNLOCK(p); 3432 3433 return (sysctl_handle_quad(oidp, parg, 0, req)); 3434} 3435 3436void 3437t3_add_configured_sysctls(adapter_t *sc) 3438{ 3439 struct sysctl_ctx_list *ctx; 3440 struct sysctl_oid_list *children; 3441 int i, j; 3442 3443 ctx = device_get_sysctl_ctx(sc->dev); 3444 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3445 3446 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3447 "intr_coal", 3448 CTLTYPE_INT|CTLFLAG_RW, sc, 3449 0, t3_set_coalesce_usecs, 3450 "I", "interrupt coalescing timer (us)"); 3451 3452 for (i = 0; i < sc->params.nports; i++) { 3453 struct port_info *pi = &sc->port[i]; 3454 struct sysctl_oid *poid; 3455 struct sysctl_oid_list *poidlist; 3456 struct mac_stats *mstats = &pi->mac.stats; 3457 3458 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3459 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3460 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3461 poidlist = SYSCTL_CHILDREN(poid); 3462 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3463 "nqsets", CTLFLAG_RD, &pi->nqsets, 3464 0, "#queue sets"); 3465 3466 for (j = 0; j < pi->nqsets; j++) { 3467 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3468 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3469 *ctrlqpoid, *lropoid; 3470 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3471 *txqpoidlist, *ctrlqpoidlist, 3472 *lropoidlist; 3473 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3474 3475 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3476 3477 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3478 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3479 qspoidlist = SYSCTL_CHILDREN(qspoid); 3480 3481 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3482 CTLFLAG_RD, &qs->fl[0].empty, 0, 3483 "freelist #0 empty"); 3484 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3485 CTLFLAG_RD, &qs->fl[1].empty, 0, 3486 "freelist #1 empty"); 3487 3488 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3489 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3490 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3491 3492 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3493 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3494 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3495 3496 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3497 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3498 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3499 3500 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3501 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3502 lropoidlist = SYSCTL_CHILDREN(lropoid); 3503 3504 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3505 CTLFLAG_RD, &qs->rspq.size, 3506 0, "#entries in response queue"); 3507 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3508 CTLFLAG_RD, &qs->rspq.cidx, 3509 0, "consumer index"); 3510 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3511 CTLFLAG_RD, &qs->rspq.credits, 3512 0, "#credits"); 3513 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3514 CTLFLAG_RD, &qs->rspq.phys_addr, 3515 "physical_address_of the queue"); 3516 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3517 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3518 0, "start rspq dump entry"); 3519 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3520 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3521 0, "#rspq entries to dump"); 3522 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3523 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3524 0, t3_dump_rspq, "A", "dump of the response queue"); 3525 3526 3527 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3528 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3529 0, "#tunneled packets dropped"); 3530 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3531 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3532 0, "#tunneled packets waiting to be sent"); 3533#if 0 3534 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3535 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3536 0, "#tunneled packets queue producer index"); 3537 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3538 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3539 0, "#tunneled packets queue consumer index"); 3540#endif 3541 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3542 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3543 0, "#tunneled packets processed by the card"); 3544 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3545 CTLFLAG_RD, &txq->cleaned, 3546 0, "#tunneled packets cleaned"); 3547 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3548 CTLFLAG_RD, &txq->in_use, 3549 0, "#tunneled packet slots in use"); 3550 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3551 CTLFLAG_RD, &txq->txq_frees, 3552 "#tunneled packets freed"); 3553 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3554 CTLFLAG_RD, &txq->txq_skipped, 3555 0, "#tunneled packet descriptors skipped"); 3556 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced", 3557 CTLFLAG_RD, &txq->txq_coalesced, 3558 0, "#tunneled packets coalesced"); 3559 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3560 CTLFLAG_RD, &txq->txq_enqueued, 3561 0, "#tunneled packets enqueued to hardware"); 3562 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3563 CTLFLAG_RD, &qs->txq_stopped, 3564 0, "tx queues stopped"); 3565 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3566 CTLFLAG_RD, &txq->phys_addr, 3567 "physical_address_of the queue"); 3568 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3569 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3570 0, "txq generation"); 3571 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3572 CTLFLAG_RD, &txq->cidx, 3573 0, "hardware queue cidx"); 3574 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3575 CTLFLAG_RD, &txq->pidx, 3576 0, "hardware queue pidx"); 3577 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3578 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3579 0, "txq start idx for dump"); 3580 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3581 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3582 0, "txq #entries to dump"); 3583 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3584 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3585 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3586 3587 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3588 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3589 0, "ctrlq start idx for dump"); 3590 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3591 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3592 0, "ctrl #entries to dump"); 3593 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3594 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3595 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3596 3597#ifdef LRO_SUPPORTED 3598 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3599 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3600 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3601 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3602 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3603 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3604 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3605 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3606#endif 3607 } 3608 3609 /* Now add a node for mac stats. */ 3610 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3611 CTLFLAG_RD, NULL, "MAC statistics"); 3612 poidlist = SYSCTL_CHILDREN(poid); 3613 3614 /* 3615 * We (ab)use the length argument (arg2) to pass on the offset 3616 * of the data that we are interested in. This is only required 3617 * for the quad counters that are updated from the hardware (we 3618 * make sure that we return the latest value). 3619 * sysctl_handle_macstat first updates *all* the counters from 3620 * the hardware, and then returns the latest value of the 3621 * requested counter. Best would be to update only the 3622 * requested counter from hardware, but t3_mac_update_stats() 3623 * hides all the register details and we don't want to dive into 3624 * all that here. 3625 */ 3626#define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3627 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3628 sysctl_handle_macstat, "QU", 0) 3629 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3630 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3631 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3632 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3633 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3634 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3635 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3636 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3637 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3638 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3639 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3640 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3641 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3642 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3643 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3644 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3645 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3646 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3647 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3648 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3649 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3650 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3651 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3652 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3653 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3654 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3655 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3656 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3657 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3658 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3659 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3660 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3661 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3662 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3663 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3664 CXGB_SYSCTL_ADD_QUAD(rx_short); 3665 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3666 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3667 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3668 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3669 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3670 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3671 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3672 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3673 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3674 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3675#undef CXGB_SYSCTL_ADD_QUAD 3676 3677#define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3678 CTLFLAG_RD, &mstats->a, 0) 3679 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3680 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3681 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3682 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3683 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3684 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3685 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3686 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3687 CXGB_SYSCTL_ADD_ULONG(num_resets); 3688 CXGB_SYSCTL_ADD_ULONG(link_faults); 3689#undef CXGB_SYSCTL_ADD_ULONG 3690 } 3691} 3692 3693/** 3694 * t3_get_desc - dump an SGE descriptor for debugging purposes 3695 * @qs: the queue set 3696 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3697 * @idx: the descriptor index in the queue 3698 * @data: where to dump the descriptor contents 3699 * 3700 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3701 * size of the descriptor. 3702 */ 3703int 3704t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3705 unsigned char *data) 3706{ 3707 if (qnum >= 6) 3708 return (EINVAL); 3709 3710 if (qnum < 3) { 3711 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3712 return -EINVAL; 3713 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3714 return sizeof(struct tx_desc); 3715 } 3716 3717 if (qnum == 3) { 3718 if (!qs->rspq.desc || idx >= qs->rspq.size) 3719 return (EINVAL); 3720 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3721 return sizeof(struct rsp_desc); 3722 } 3723 3724 qnum -= 4; 3725 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3726 return (EINVAL); 3727 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3728 return sizeof(struct rx_desc); 3729} 3730