cxgb_sge.c revision 175504
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29#define DEBUG_BUFRING 30 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 175504 2008-01-19 22:47:43Z kmacy $"); 34 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/module.h> 39#include <sys/bus.h> 40#include <sys/conf.h> 41#include <machine/bus.h> 42#include <machine/resource.h> 43#include <sys/bus_dma.h> 44#include <sys/rman.h> 45#include <sys/queue.h> 46#include <sys/sysctl.h> 47#include <sys/taskqueue.h> 48 49#include <sys/proc.h> 50#include <sys/sbuf.h> 51#include <sys/sched.h> 52#include <sys/smp.h> 53#include <sys/systm.h> 54#include <sys/syslog.h> 55 56#include <netinet/in_systm.h> 57#include <netinet/in.h> 58#include <netinet/ip.h> 59#include <netinet/tcp.h> 60 61#include <dev/pci/pcireg.h> 62#include <dev/pci/pcivar.h> 63 64#include <vm/vm.h> 65#include <vm/pmap.h> 66 67#ifdef CONFIG_DEFINED 68#include <cxgb_include.h> 69#include <sys/mvec.h> 70#else 71#include <dev/cxgb/cxgb_include.h> 72#include <dev/cxgb/sys/mvec.h> 73#endif 74 75int txq_fills = 0; 76static int recycle_enable = 1; 77extern int cxgb_txq_buf_ring_size; 78int cxgb_cached_allocations; 79int cxgb_cached; 80int cxgb_ext_freed; 81extern int cxgb_use_16k_clusters; 82extern int cxgb_pcpu_cache_enable; 83 84 85#define USE_GTS 0 86 87#define SGE_RX_SM_BUF_SIZE 1536 88#define SGE_RX_DROP_THRES 16 89#define SGE_RX_COPY_THRES 128 90 91/* 92 * Period of the Tx buffer reclaim timer. This timer does not need to run 93 * frequently as Tx buffers are usually reclaimed by new Tx packets. 94 */ 95#define TX_RECLAIM_PERIOD (hz >> 1) 96 97/* 98 * Values for sge_txq.flags 99 */ 100enum { 101 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 102 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 103}; 104 105struct tx_desc { 106 uint64_t flit[TX_DESC_FLITS]; 107} __packed; 108 109struct rx_desc { 110 uint32_t addr_lo; 111 uint32_t len_gen; 112 uint32_t gen2; 113 uint32_t addr_hi; 114} __packed;; 115 116struct rsp_desc { /* response queue descriptor */ 117 struct rss_header rss_hdr; 118 uint32_t flags; 119 uint32_t len_cq; 120 uint8_t imm_data[47]; 121 uint8_t intr_gen; 122} __packed; 123 124#define RX_SW_DESC_MAP_CREATED (1 << 0) 125#define TX_SW_DESC_MAP_CREATED (1 << 1) 126#define RX_SW_DESC_INUSE (1 << 3) 127#define TX_SW_DESC_MAPPED (1 << 4) 128 129#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 130#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 131#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 132#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 133 134struct tx_sw_desc { /* SW state per Tx descriptor */ 135 struct mbuf_iovec mi; 136 bus_dmamap_t map; 137 int flags; 138}; 139 140struct rx_sw_desc { /* SW state per Rx descriptor */ 141 caddr_t rxsd_cl; 142 caddr_t data; 143 bus_dmamap_t map; 144 int flags; 145}; 146 147struct txq_state { 148 unsigned int compl; 149 unsigned int gen; 150 unsigned int pidx; 151}; 152 153struct refill_fl_cb_arg { 154 int error; 155 bus_dma_segment_t seg; 156 int nseg; 157}; 158 159/* 160 * Maps a number of flits to the number of Tx descriptors that can hold them. 161 * The formula is 162 * 163 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 164 * 165 * HW allows up to 4 descriptors to be combined into a WR. 166 */ 167static uint8_t flit_desc_map[] = { 168 0, 169#if SGE_NUM_GENBITS == 1 170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 171 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 172 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 174#elif SGE_NUM_GENBITS == 2 175 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 176 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 177 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 178 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 179#else 180# error "SGE_NUM_GENBITS must be 1 or 2" 181#endif 182}; 183 184 185static int lro_default = 0; 186int cxgb_debug = 0; 187 188static void sge_timer_cb(void *arg); 189static void sge_timer_reclaim(void *arg, int ncount); 190static void sge_txq_reclaim_handler(void *arg, int ncount); 191 192/** 193 * reclaim_completed_tx - reclaims completed Tx descriptors 194 * @adapter: the adapter 195 * @q: the Tx queue to reclaim completed descriptors from 196 * 197 * Reclaims Tx descriptors that the SGE has indicated it has processed, 198 * and frees the associated buffers if possible. Called with the Tx 199 * queue's lock held. 200 */ 201static __inline int 202reclaim_completed_tx_(struct sge_txq *q, int reclaim_min) 203{ 204 int reclaim = desc_reclaimable(q); 205 206 if (reclaim < reclaim_min) 207 return (0); 208 209 mtx_assert(&q->lock, MA_OWNED); 210 if (reclaim > 0) { 211 t3_free_tx_desc(q, reclaim); 212 q->cleaned += reclaim; 213 q->in_use -= reclaim; 214 } 215 return (reclaim); 216} 217 218/** 219 * should_restart_tx - are there enough resources to restart a Tx queue? 220 * @q: the Tx queue 221 * 222 * Checks if there are enough descriptors to restart a suspended Tx queue. 223 */ 224static __inline int 225should_restart_tx(const struct sge_txq *q) 226{ 227 unsigned int r = q->processed - q->cleaned; 228 229 return q->in_use - r < (q->size >> 1); 230} 231 232/** 233 * t3_sge_init - initialize SGE 234 * @adap: the adapter 235 * @p: the SGE parameters 236 * 237 * Performs SGE initialization needed every time after a chip reset. 238 * We do not initialize any of the queue sets here, instead the driver 239 * top-level must request those individually. We also do not enable DMA 240 * here, that should be done after the queues have been set up. 241 */ 242void 243t3_sge_init(adapter_t *adap, struct sge_params *p) 244{ 245 u_int ctrl, ups; 246 247 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 248 249 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 250 F_CQCRDTCTRL | 251 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 252 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 253#if SGE_NUM_GENBITS == 1 254 ctrl |= F_EGRGENCTRL; 255#endif 256 if (adap->params.rev > 0) { 257 if (!(adap->flags & (USING_MSIX | USING_MSI))) 258 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 259 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 260 } 261 t3_write_reg(adap, A_SG_CONTROL, ctrl); 262 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 263 V_LORCQDRBTHRSH(512)); 264 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 265 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 266 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 267 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 268 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 269 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 270 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 271 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 272 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 273} 274 275 276/** 277 * sgl_len - calculates the size of an SGL of the given capacity 278 * @n: the number of SGL entries 279 * 280 * Calculates the number of flits needed for a scatter/gather list that 281 * can hold the given number of entries. 282 */ 283static __inline unsigned int 284sgl_len(unsigned int n) 285{ 286 return ((3 * n) / 2 + (n & 1)); 287} 288 289/** 290 * get_imm_packet - return the next ingress packet buffer from a response 291 * @resp: the response descriptor containing the packet data 292 * 293 * Return a packet containing the immediate data of the given response. 294 */ 295#ifdef DISABLE_MBUF_IOVEC 296static __inline int 297get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh) 298{ 299 struct mbuf *m = mh->m_head; 300 301 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 302 m->m_pkthdr.len = m->m_len = len; 303 return (0); 304} 305 306#else 307static int 308get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags) 309{ 310 311 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 312 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 313 return (0); 314 315} 316#endif 317 318static __inline u_int 319flits_to_desc(u_int n) 320{ 321 return (flit_desc_map[n]); 322} 323 324void 325t3_sge_err_intr_handler(adapter_t *adapter) 326{ 327 unsigned int v, status; 328 329 330 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 331 332 if (status & F_RSPQCREDITOVERFOW) 333 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 334 335 if (status & F_RSPQDISABLED) { 336 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 337 338 CH_ALERT(adapter, 339 "packet delivered to disabled response queue (0x%x)\n", 340 (v >> S_RSPQ0DISABLED) & 0xff); 341 } 342 343 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 344 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 345 t3_fatal_err(adapter); 346} 347 348void 349t3_sge_prep(adapter_t *adap, struct sge_params *p) 350{ 351 int i; 352 353 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 354 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 355 356 for (i = 0; i < SGE_QSETS; ++i) { 357 struct qset_params *q = p->qset + i; 358 359 q->polling = adap->params.rev > 0; 360 361 if (adap->params.nports > 2) { 362 q->coalesce_nsecs = 50000; 363 } else { 364#ifdef INVARIANTS 365 q->coalesce_nsecs = 10000; 366#else 367 q->coalesce_nsecs = 5000; 368#endif 369 } 370 q->rspq_size = RSPQ_Q_SIZE; 371 q->fl_size = FL_Q_SIZE; 372 q->jumbo_size = JUMBO_Q_SIZE; 373 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 374 q->txq_size[TXQ_OFLD] = 1024; 375 q->txq_size[TXQ_CTRL] = 256; 376 q->cong_thres = 0; 377 } 378} 379 380int 381t3_sge_alloc(adapter_t *sc) 382{ 383 384 /* The parent tag. */ 385 if (bus_dma_tag_create( NULL, /* parent */ 386 1, 0, /* algnmnt, boundary */ 387 BUS_SPACE_MAXADDR, /* lowaddr */ 388 BUS_SPACE_MAXADDR, /* highaddr */ 389 NULL, NULL, /* filter, filterarg */ 390 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 391 BUS_SPACE_UNRESTRICTED, /* nsegments */ 392 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 393 0, /* flags */ 394 NULL, NULL, /* lock, lockarg */ 395 &sc->parent_dmat)) { 396 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 397 return (ENOMEM); 398 } 399 400 /* 401 * DMA tag for normal sized RX frames 402 */ 403 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 404 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 405 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 406 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 407 return (ENOMEM); 408 } 409 410 /* 411 * DMA tag for jumbo sized RX frames. 412 */ 413 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 414 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 415 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 416 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 417 return (ENOMEM); 418 } 419 420 /* 421 * DMA tag for TX frames. 422 */ 423 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 424 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 425 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 426 NULL, NULL, &sc->tx_dmat)) { 427 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 428 return (ENOMEM); 429 } 430 431 return (0); 432} 433 434int 435t3_sge_free(struct adapter * sc) 436{ 437 438 if (sc->tx_dmat != NULL) 439 bus_dma_tag_destroy(sc->tx_dmat); 440 441 if (sc->rx_jumbo_dmat != NULL) 442 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 443 444 if (sc->rx_dmat != NULL) 445 bus_dma_tag_destroy(sc->rx_dmat); 446 447 if (sc->parent_dmat != NULL) 448 bus_dma_tag_destroy(sc->parent_dmat); 449 450 return (0); 451} 452 453void 454t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 455{ 456 457 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 458 qs->rspq.polling = 0 /* p->polling */; 459} 460 461#if !defined(__i386__) && !defined(__amd64__) 462static void 463refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 464{ 465 struct refill_fl_cb_arg *cb_arg = arg; 466 467 cb_arg->error = error; 468 cb_arg->seg = segs[0]; 469 cb_arg->nseg = nseg; 470 471} 472#endif 473/** 474 * refill_fl - refill an SGE free-buffer list 475 * @sc: the controller softc 476 * @q: the free-list to refill 477 * @n: the number of new buffers to allocate 478 * 479 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 480 * The caller must assure that @n does not exceed the queue's capacity. 481 */ 482static void 483refill_fl(adapter_t *sc, struct sge_fl *q, int n) 484{ 485 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 486 struct rx_desc *d = &q->desc[q->pidx]; 487 struct refill_fl_cb_arg cb_arg; 488 caddr_t cl; 489 int err; 490 int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); 491 492 cb_arg.error = 0; 493 while (n--) { 494 /* 495 * We only allocate a cluster, mbuf allocation happens after rx 496 */ 497 if ((cl = cxgb_cache_get(q->zone)) == NULL) { 498 log(LOG_WARNING, "Failed to allocate cluster\n"); 499 goto done; 500 } 501 502 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 503 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 504 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 505 uma_zfree(q->zone, cl); 506 goto done; 507 } 508 sd->flags |= RX_SW_DESC_MAP_CREATED; 509 } 510#if !defined(__i386__) && !defined(__amd64__) 511 err = bus_dmamap_load(q->entry_tag, sd->map, 512 cl + header_size, q->buf_size, 513 refill_fl_cb, &cb_arg, 0); 514 515 if (err != 0 || cb_arg.error) { 516 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 517 /* 518 * XXX free cluster 519 */ 520 return; 521 } 522#else 523 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size)); 524#endif 525 sd->flags |= RX_SW_DESC_INUSE; 526 sd->rxsd_cl = cl; 527 sd->data = cl + header_size; 528 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 529 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 530 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 531 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 532 533 d++; 534 sd++; 535 536 if (++q->pidx == q->size) { 537 q->pidx = 0; 538 q->gen ^= 1; 539 sd = q->sdesc; 540 d = q->desc; 541 } 542 q->credits++; 543 } 544 545done: 546 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 547} 548 549 550/** 551 * free_rx_bufs - free the Rx buffers on an SGE free list 552 * @sc: the controle softc 553 * @q: the SGE free list to clean up 554 * 555 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 556 * this queue should be stopped before calling this function. 557 */ 558static void 559free_rx_bufs(adapter_t *sc, struct sge_fl *q) 560{ 561 u_int cidx = q->cidx; 562 563 while (q->credits--) { 564 struct rx_sw_desc *d = &q->sdesc[cidx]; 565 566 if (d->flags & RX_SW_DESC_INUSE) { 567 bus_dmamap_unload(q->entry_tag, d->map); 568 bus_dmamap_destroy(q->entry_tag, d->map); 569 uma_zfree(q->zone, d->rxsd_cl); 570 } 571 d->rxsd_cl = NULL; 572 if (++cidx == q->size) 573 cidx = 0; 574 } 575} 576 577static __inline void 578__refill_fl(adapter_t *adap, struct sge_fl *fl) 579{ 580 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 581} 582 583static __inline void 584__refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 585{ 586 if ((fl->size - fl->credits) < max) 587 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 588} 589 590void 591refill_fl_service(adapter_t *adap, struct sge_fl *fl) 592{ 593 __refill_fl_lt(adap, fl, 512); 594} 595 596/** 597 * recycle_rx_buf - recycle a receive buffer 598 * @adapter: the adapter 599 * @q: the SGE free list 600 * @idx: index of buffer to recycle 601 * 602 * Recycles the specified buffer on the given free list by adding it at 603 * the next available slot on the list. 604 */ 605static void 606recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 607{ 608 struct rx_desc *from = &q->desc[idx]; 609 struct rx_desc *to = &q->desc[q->pidx]; 610 611 q->sdesc[q->pidx] = q->sdesc[idx]; 612 to->addr_lo = from->addr_lo; // already big endian 613 to->addr_hi = from->addr_hi; // likewise 614 wmb(); 615 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 616 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 617 q->credits++; 618 619 if (++q->pidx == q->size) { 620 q->pidx = 0; 621 q->gen ^= 1; 622 } 623 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 624} 625 626static void 627alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 628{ 629 uint32_t *addr; 630 631 addr = arg; 632 *addr = segs[0].ds_addr; 633} 634 635static int 636alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 637 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 638 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 639{ 640 size_t len = nelem * elem_size; 641 void *s = NULL; 642 void *p = NULL; 643 int err; 644 645 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 646 BUS_SPACE_MAXADDR_32BIT, 647 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 648 len, 0, NULL, NULL, tag)) != 0) { 649 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 650 return (ENOMEM); 651 } 652 653 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 654 map)) != 0) { 655 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 656 return (ENOMEM); 657 } 658 659 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 660 bzero(p, len); 661 *(void **)desc = p; 662 663 if (sw_size) { 664 len = nelem * sw_size; 665 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 666 *(void **)sdesc = s; 667 } 668 if (parent_entry_tag == NULL) 669 return (0); 670 671 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 672 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 673 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 674 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 675 NULL, NULL, entry_tag)) != 0) { 676 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 677 return (ENOMEM); 678 } 679 return (0); 680} 681 682static void 683sge_slow_intr_handler(void *arg, int ncount) 684{ 685 adapter_t *sc = arg; 686 687 t3_slow_intr_handler(sc); 688} 689 690/** 691 * sge_timer_cb - perform periodic maintenance of an SGE qset 692 * @data: the SGE queue set to maintain 693 * 694 * Runs periodically from a timer to perform maintenance of an SGE queue 695 * set. It performs two tasks: 696 * 697 * a) Cleans up any completed Tx descriptors that may still be pending. 698 * Normal descriptor cleanup happens when new packets are added to a Tx 699 * queue so this timer is relatively infrequent and does any cleanup only 700 * if the Tx queue has not seen any new packets in a while. We make a 701 * best effort attempt to reclaim descriptors, in that we don't wait 702 * around if we cannot get a queue's lock (which most likely is because 703 * someone else is queueing new packets and so will also handle the clean 704 * up). Since control queues use immediate data exclusively we don't 705 * bother cleaning them up here. 706 * 707 * b) Replenishes Rx queues that have run out due to memory shortage. 708 * Normally new Rx buffers are added when existing ones are consumed but 709 * when out of memory a queue can become empty. We try to add only a few 710 * buffers here, the queue will be replenished fully as these new buffers 711 * are used up if memory shortage has subsided. 712 * 713 * c) Return coalesced response queue credits in case a response queue is 714 * starved. 715 * 716 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 717 * fifo overflows and the FW doesn't implement any recovery scheme yet. 718 */ 719static void 720sge_timer_cb(void *arg) 721{ 722 adapter_t *sc = arg; 723#ifndef IFNET_MULTIQUEUE 724 struct port_info *pi; 725 struct sge_qset *qs; 726 struct sge_txq *txq; 727 int i, j; 728 int reclaim_ofl, refill_rx; 729 730 for (i = 0; i < sc->params.nports; i++) 731 for (j = 0; j < sc->port[i].nqsets; j++) { 732 qs = &sc->sge.qs[i + j]; 733 txq = &qs->txq[0]; 734 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 735 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 736 (qs->fl[1].credits < qs->fl[1].size)); 737 if (reclaim_ofl || refill_rx) { 738 pi = &sc->port[i]; 739 taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task); 740 break; 741 } 742 } 743#endif 744 if (sc->params.nports > 2) { 745 int i; 746 747 for_each_port(sc, i) { 748 struct port_info *pi = &sc->port[i]; 749 750 t3_write_reg(sc, A_SG_KDOORBELL, 751 F_SELEGRCNTX | 752 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 753 } 754 } 755 if (sc->open_device_map != 0) 756 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 757} 758 759/* 760 * This is meant to be a catch-all function to keep sge state private 761 * to sge.c 762 * 763 */ 764int 765t3_sge_init_adapter(adapter_t *sc) 766{ 767 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 768 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 769 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 770 mi_init(); 771 cxgb_cache_init(); 772 return (0); 773} 774 775int 776t3_sge_reset_adapter(adapter_t *sc) 777{ 778 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 779 return (0); 780} 781 782int 783t3_sge_init_port(struct port_info *pi) 784{ 785 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 786 return (0); 787} 788 789void 790t3_sge_deinit_sw(adapter_t *sc) 791{ 792 int i; 793 794 callout_drain(&sc->sge_timer_ch); 795 if (sc->tq) 796 taskqueue_drain(sc->tq, &sc->slow_intr_task); 797 for (i = 0; i < sc->params.nports; i++) 798 if (sc->port[i].tq != NULL) 799 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); 800 801 mi_deinit(); 802} 803 804/** 805 * refill_rspq - replenish an SGE response queue 806 * @adapter: the adapter 807 * @q: the response queue to replenish 808 * @credits: how many new responses to make available 809 * 810 * Replenishes a response queue by making the supplied number of responses 811 * available to HW. 812 */ 813static __inline void 814refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 815{ 816 817 /* mbufs are allocated on demand when a rspq entry is processed. */ 818 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 819 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 820} 821 822static __inline void 823sge_txq_reclaim_(struct sge_txq *txq, int force) 824{ 825 826 if (desc_reclaimable(txq) < 16) 827 return; 828 if (mtx_trylock(&txq->lock) == 0) 829 return; 830 reclaim_completed_tx_(txq, 16); 831 mtx_unlock(&txq->lock); 832 833} 834 835static void 836sge_txq_reclaim_handler(void *arg, int ncount) 837{ 838 struct sge_txq *q = arg; 839 840 sge_txq_reclaim_(q, TRUE); 841} 842 843 844 845static void 846sge_timer_reclaim(void *arg, int ncount) 847{ 848 struct port_info *pi = arg; 849 int i, nqsets = pi->nqsets; 850 adapter_t *sc = pi->adapter; 851 struct sge_qset *qs; 852 struct sge_txq *txq; 853 struct mtx *lock; 854 855#ifdef IFNET_MULTIQUEUE 856 panic("%s should not be called with multiqueue support\n", __FUNCTION__); 857#endif 858 for (i = 0; i < nqsets; i++) { 859 qs = &sc->sge.qs[i]; 860 861 txq = &qs->txq[TXQ_OFLD]; 862 sge_txq_reclaim_(txq, FALSE); 863 864 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 865 &sc->sge.qs[0].rspq.lock; 866 867 if (mtx_trylock(lock)) { 868 /* XXX currently assume that we are *NOT* polling */ 869 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 870 871 if (qs->fl[0].credits < qs->fl[0].size - 16) 872 __refill_fl(sc, &qs->fl[0]); 873 if (qs->fl[1].credits < qs->fl[1].size - 16) 874 __refill_fl(sc, &qs->fl[1]); 875 876 if (status & (1 << qs->rspq.cntxt_id)) { 877 if (qs->rspq.credits) { 878 refill_rspq(sc, &qs->rspq, 1); 879 qs->rspq.credits--; 880 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 881 1 << qs->rspq.cntxt_id); 882 } 883 } 884 mtx_unlock(lock); 885 } 886 } 887} 888 889/** 890 * init_qset_cntxt - initialize an SGE queue set context info 891 * @qs: the queue set 892 * @id: the queue set id 893 * 894 * Initializes the TIDs and context ids for the queues of a queue set. 895 */ 896static void 897init_qset_cntxt(struct sge_qset *qs, u_int id) 898{ 899 900 qs->rspq.cntxt_id = id; 901 qs->fl[0].cntxt_id = 2 * id; 902 qs->fl[1].cntxt_id = 2 * id + 1; 903 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 904 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 905 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 906 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 907 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 908 909 mbufq_init(&qs->txq[TXQ_ETH].sendq); 910 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 911 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 912} 913 914 915static void 916txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 917{ 918 txq->in_use += ndesc; 919 /* 920 * XXX we don't handle stopping of queue 921 * presumably start handles this when we bump against the end 922 */ 923 txqs->gen = txq->gen; 924 txq->unacked += ndesc; 925 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 926 txq->unacked &= 7; 927 txqs->pidx = txq->pidx; 928 txq->pidx += ndesc; 929#ifdef INVARIANTS 930 if (((txqs->pidx > txq->cidx) && 931 (txq->pidx < txqs->pidx) && 932 (txq->pidx >= txq->cidx)) || 933 ((txqs->pidx < txq->cidx) && 934 (txq->pidx >= txq-> cidx)) || 935 ((txqs->pidx < txq->cidx) && 936 (txq->cidx < txqs->pidx))) 937 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 938 txqs->pidx, txq->pidx, txq->cidx); 939#endif 940 if (txq->pidx >= txq->size) { 941 txq->pidx -= txq->size; 942 txq->gen ^= 1; 943 } 944 945} 946 947/** 948 * calc_tx_descs - calculate the number of Tx descriptors for a packet 949 * @m: the packet mbufs 950 * @nsegs: the number of segments 951 * 952 * Returns the number of Tx descriptors needed for the given Ethernet 953 * packet. Ethernet packets require addition of WR and CPL headers. 954 */ 955static __inline unsigned int 956calc_tx_descs(const struct mbuf *m, int nsegs) 957{ 958 unsigned int flits; 959 960 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 961 return 1; 962 963 flits = sgl_len(nsegs) + 2; 964#ifdef TSO_SUPPORTED 965 if (m->m_pkthdr.csum_flags & CSUM_TSO) 966 flits++; 967#endif 968 return flits_to_desc(flits); 969} 970 971static unsigned int 972busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 973 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 974{ 975 struct mbuf *m0; 976 int err, pktlen, pass = 0; 977 978retry: 979 err = 0; 980 m0 = *m; 981 pktlen = m0->m_pkthdr.len; 982#if defined(__i386__) || defined(__amd64__) 983 if (busdma_map_sg_collapse(m, segs, nsegs) == 0) { 984 goto done; 985 } else 986#endif 987 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0); 988 989 if (err == 0) { 990 goto done; 991 } 992 if (err == EFBIG && pass == 0) { 993 pass = 1; 994 /* Too many segments, try to defrag */ 995 m0 = m_defrag(m0, M_DONTWAIT); 996 if (m0 == NULL) { 997 m_freem(*m); 998 *m = NULL; 999 return (ENOBUFS); 1000 } 1001 *m = m0; 1002 goto retry; 1003 } else if (err == ENOMEM) { 1004 return (err); 1005 } if (err) { 1006 if (cxgb_debug) 1007 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1008 m_freem(m0); 1009 *m = NULL; 1010 return (err); 1011 } 1012done: 1013#if !defined(__i386__) && !defined(__amd64__) 1014 bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE); 1015#endif 1016 txsd->flags |= TX_SW_DESC_MAPPED; 1017 1018 return (0); 1019} 1020 1021/** 1022 * make_sgl - populate a scatter/gather list for a packet 1023 * @sgp: the SGL to populate 1024 * @segs: the packet dma segments 1025 * @nsegs: the number of segments 1026 * 1027 * Generates a scatter/gather list for the buffers that make up a packet 1028 * and returns the SGL size in 8-byte words. The caller must size the SGL 1029 * appropriately. 1030 */ 1031static __inline void 1032make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1033{ 1034 int i, idx; 1035 1036 for (idx = 0, i = 0; i < nsegs; i++) { 1037 /* 1038 * firmware doesn't like empty segments 1039 */ 1040 if (segs[i].ds_len == 0) 1041 continue; 1042 if (i && idx == 0) 1043 ++sgp; 1044 1045 sgp->len[idx] = htobe32(segs[i].ds_len); 1046 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1047 idx ^= 1; 1048 } 1049 1050 if (idx) { 1051 sgp->len[idx] = 0; 1052 sgp->addr[idx] = 0; 1053 } 1054} 1055 1056/** 1057 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1058 * @adap: the adapter 1059 * @q: the Tx queue 1060 * 1061 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1062 * where the HW is going to sleep just after we checked, however, 1063 * then the interrupt handler will detect the outstanding TX packet 1064 * and ring the doorbell for us. 1065 * 1066 * When GTS is disabled we unconditionally ring the doorbell. 1067 */ 1068static __inline void 1069check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1070{ 1071#if USE_GTS 1072 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1073 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1074 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1075#ifdef T3_TRACE 1076 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1077 q->cntxt_id); 1078#endif 1079 t3_write_reg(adap, A_SG_KDOORBELL, 1080 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1081 } 1082#else 1083 wmb(); /* write descriptors before telling HW */ 1084 t3_write_reg(adap, A_SG_KDOORBELL, 1085 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1086#endif 1087} 1088 1089static __inline void 1090wr_gen2(struct tx_desc *d, unsigned int gen) 1091{ 1092#if SGE_NUM_GENBITS == 2 1093 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1094#endif 1095} 1096 1097/** 1098 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1099 * @ndesc: number of Tx descriptors spanned by the SGL 1100 * @txd: first Tx descriptor to be written 1101 * @txqs: txq state (generation and producer index) 1102 * @txq: the SGE Tx queue 1103 * @sgl: the SGL 1104 * @flits: number of flits to the start of the SGL in the first descriptor 1105 * @sgl_flits: the SGL size in flits 1106 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1107 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1108 * 1109 * Write a work request header and an associated SGL. If the SGL is 1110 * small enough to fit into one Tx descriptor it has already been written 1111 * and we just need to write the WR header. Otherwise we distribute the 1112 * SGL across the number of descriptors it spans. 1113 */ 1114static void 1115write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1116 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1117 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1118{ 1119 1120 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1121 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1122 1123 if (__predict_true(ndesc == 1)) { 1124 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1125 V_WR_SGLSFLT(flits)) | wr_hi; 1126 wmb(); 1127 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1128 V_WR_GEN(txqs->gen)) | wr_lo; 1129 /* XXX gen? */ 1130 wr_gen2(txd, txqs->gen); 1131 1132 } else { 1133 unsigned int ogen = txqs->gen; 1134 const uint64_t *fp = (const uint64_t *)sgl; 1135 struct work_request_hdr *wp = wrp; 1136 1137 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1138 V_WR_SGLSFLT(flits)) | wr_hi; 1139 1140 while (sgl_flits) { 1141 unsigned int avail = WR_FLITS - flits; 1142 1143 if (avail > sgl_flits) 1144 avail = sgl_flits; 1145 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1146 sgl_flits -= avail; 1147 ndesc--; 1148 if (!sgl_flits) 1149 break; 1150 1151 fp += avail; 1152 txd++; 1153 txsd++; 1154 if (++txqs->pidx == txq->size) { 1155 txqs->pidx = 0; 1156 txqs->gen ^= 1; 1157 txd = txq->desc; 1158 txsd = txq->sdesc; 1159 } 1160 1161 /* 1162 * when the head of the mbuf chain 1163 * is freed all clusters will be freed 1164 * with it 1165 */ 1166 KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p", txsd->mi.mi_base)); 1167 wrp = (struct work_request_hdr *)txd; 1168 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1169 V_WR_SGLSFLT(1)) | wr_hi; 1170 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1171 sgl_flits + 1)) | 1172 V_WR_GEN(txqs->gen)) | wr_lo; 1173 wr_gen2(txd, txqs->gen); 1174 flits = 1; 1175 } 1176 wrp->wr_hi |= htonl(F_WR_EOP); 1177 wmb(); 1178 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1179 wr_gen2((struct tx_desc *)wp, ogen); 1180 } 1181} 1182 1183/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1184#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1185 1186#ifdef VLAN_SUPPORTED 1187#define GET_VTAG(cntrl, m) \ 1188do { \ 1189 if ((m)->m_flags & M_VLANTAG) \ 1190 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1191} while (0) 1192 1193#define GET_VTAG_MI(cntrl, mi) \ 1194do { \ 1195 if ((mi)->mi_flags & M_VLANTAG) \ 1196 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \ 1197} while (0) 1198#else 1199#define GET_VTAG(cntrl, m) 1200#define GET_VTAG_MI(cntrl, m) 1201#endif 1202 1203int 1204t3_encap(struct sge_qset *qs, struct mbuf **m, int count) 1205{ 1206 adapter_t *sc; 1207 struct mbuf *m0; 1208 struct sge_txq *txq; 1209 struct txq_state txqs; 1210 struct port_info *pi; 1211 unsigned int ndesc, flits, cntrl, mlen; 1212 int err, nsegs, tso_info = 0; 1213 1214 struct work_request_hdr *wrp; 1215 struct tx_sw_desc *txsd; 1216 struct sg_ent *sgp, *sgl; 1217 uint32_t wr_hi, wr_lo, sgl_flits; 1218 bus_dma_segment_t segs[TX_MAX_SEGS]; 1219 1220 struct tx_desc *txd; 1221 struct mbuf_vec *mv; 1222 struct mbuf_iovec *mi; 1223 1224 DPRINTF("t3_encap cpu=%d ", curcpu); 1225 KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx)); 1226 1227 mi = NULL; 1228 pi = qs->port; 1229 sc = pi->adapter; 1230 txq = &qs->txq[TXQ_ETH]; 1231 txd = &txq->desc[txq->pidx]; 1232 txsd = &txq->sdesc[txq->pidx]; 1233 sgl = txq->txq_sgl; 1234 m0 = *m; 1235 1236 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); 1237 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); 1238 if (cxgb_debug) 1239 printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx); 1240 1241 mtx_assert(&txq->lock, MA_OWNED); 1242 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1243/* 1244 * XXX need to add VLAN support for 6.x 1245 */ 1246#ifdef VLAN_SUPPORTED 1247 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1248 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1249#endif 1250 KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p", 1251 txsd->mi.mi_base)); 1252 if (count > 1) { 1253 panic("count > 1 not support in CVS\n"); 1254 if ((err = busdma_map_sg_vec(m, &m0, segs, count))) 1255 return (err); 1256 nsegs = count; 1257 } else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) { 1258 if (cxgb_debug) 1259 printf("failed ... err=%d\n", err); 1260 return (err); 1261 } 1262 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count)); 1263 1264 if (!(m0->m_pkthdr.len <= PIO_LEN)) { 1265 mi_collapse_mbuf(&txsd->mi, m0); 1266 mi = &txsd->mi; 1267 } 1268 if (count > 1) { 1269 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1270 int i, fidx; 1271 struct mbuf_iovec *batchmi; 1272 1273 mv = mtomv(m0); 1274 batchmi = mv->mv_vec; 1275 1276 wrp = (struct work_request_hdr *)txd; 1277 1278 flits = count*2 + 1; 1279 txq_prod(txq, 1, &txqs); 1280 1281 for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) { 1282 struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i]; 1283 1284 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1285 GET_VTAG_MI(cntrl, batchmi); 1286 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1287 cbe->cntrl = htonl(cntrl); 1288 cbe->len = htonl(batchmi->mi_len | 0x80000000); 1289 cbe->addr = htobe64(segs[i].ds_addr); 1290 txd->flit[fidx] |= htobe64(1 << 24); 1291 } 1292 1293 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1294 V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1295 wmb(); 1296 wrp->wr_lo = htonl(V_WR_LEN(flits) | 1297 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1298 /* XXX gen? */ 1299 wr_gen2(txd, txqs.gen); 1300 check_ring_tx_db(sc, txq); 1301 1302 return (0); 1303 } else if (tso_info) { 1304 int undersized, eth_type; 1305 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1306 struct ip *ip; 1307 struct tcphdr *tcp; 1308 char *pkthdr, tmp[TCPPKTHDRSIZE]; 1309 struct mbuf_vec *mv; 1310 struct mbuf_iovec *tmpmi; 1311 1312 mv = mtomv(m0); 1313 tmpmi = mv->mv_vec; 1314 1315 txd->flit[2] = 0; 1316 GET_VTAG_MI(cntrl, mi); 1317 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1318 hdr->cntrl = htonl(cntrl); 1319 mlen = m0->m_pkthdr.len; 1320 hdr->len = htonl(mlen | 0x80000000); 1321 1322 DPRINTF("tso buf len=%d\n", mlen); 1323 undersized = (((tmpmi->mi_len < TCPPKTHDRSIZE) && 1324 (m0->m_flags & M_VLANTAG)) || 1325 (tmpmi->mi_len < TCPPKTHDRSIZE - ETHER_VLAN_ENCAP_LEN)); 1326 if (__predict_false(undersized)) { 1327 pkthdr = tmp; 1328 dump_mi(mi); 1329 panic("discontig packet - fixxorz"); 1330 } else 1331 pkthdr = m0->m_data; 1332 1333 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1334 eth_type = CPL_ETH_II_VLAN; 1335 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1336 ETHER_VLAN_ENCAP_LEN); 1337 } else { 1338 eth_type = CPL_ETH_II; 1339 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1340 } 1341 tcp = (struct tcphdr *)((uint8_t *)ip + 1342 sizeof(*ip)); 1343 1344 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1345 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1346 V_LSO_TCPHDR_WORDS(tcp->th_off); 1347 hdr->lso_info = htonl(tso_info); 1348 flits = 3; 1349 } else { 1350 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1351 1352 GET_VTAG(cntrl, m0); 1353 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1354 cpl->cntrl = htonl(cntrl); 1355 mlen = m0->m_pkthdr.len; 1356 cpl->len = htonl(mlen | 0x80000000); 1357 1358 if (mlen <= PIO_LEN) { 1359 txq_prod(txq, 1, &txqs); 1360 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1361 m_freem(m0); 1362 m0 = NULL; 1363 flits = (mlen + 7) / 8 + 2; 1364 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1365 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1366 F_WR_SOP | F_WR_EOP | txqs.compl); 1367 wmb(); 1368 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1369 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1370 1371 wr_gen2(txd, txqs.gen); 1372 check_ring_tx_db(sc, txq); 1373 DPRINTF("pio buf\n"); 1374 return (0); 1375 } 1376 DPRINTF("regular buf\n"); 1377 flits = 2; 1378 } 1379 wrp = (struct work_request_hdr *)txd; 1380 1381#ifdef nomore 1382 /* 1383 * XXX need to move into one of the helper routines above 1384 * 1385 */ 1386 if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0) 1387 return (err); 1388 m0 = *m; 1389#endif 1390 ndesc = calc_tx_descs(m0, nsegs); 1391 1392 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1393 make_sgl(sgp, segs, nsegs); 1394 1395 sgl_flits = sgl_len(nsegs); 1396 1397 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1398 txq_prod(txq, ndesc, &txqs); 1399 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1400 wr_lo = htonl(V_WR_TID(txq->token)); 1401 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1402 check_ring_tx_db(pi->adapter, txq); 1403 1404 if ((m0->m_type == MT_DATA) && 1405 ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) && 1406 (m0->m_ext.ext_type != EXT_PACKET)) { 1407 m0->m_flags &= ~M_EXT ; 1408 cxgb_mbufs_outstanding--; 1409 m_free(m0); 1410 } 1411 1412 return (0); 1413} 1414 1415 1416/** 1417 * write_imm - write a packet into a Tx descriptor as immediate data 1418 * @d: the Tx descriptor to write 1419 * @m: the packet 1420 * @len: the length of packet data to write as immediate data 1421 * @gen: the generation bit value to write 1422 * 1423 * Writes a packet as immediate data into a Tx descriptor. The packet 1424 * contains a work request at its beginning. We must write the packet 1425 * carefully so the SGE doesn't read accidentally before it's written in 1426 * its entirety. 1427 */ 1428static __inline void 1429write_imm(struct tx_desc *d, struct mbuf *m, 1430 unsigned int len, unsigned int gen) 1431{ 1432 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1433 struct work_request_hdr *to = (struct work_request_hdr *)d; 1434 1435 if (len > WR_LEN) 1436 panic("len too big %d\n", len); 1437 if (len < sizeof(*from)) 1438 panic("len too small %d", len); 1439 1440 memcpy(&to[1], &from[1], len - sizeof(*from)); 1441 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1442 V_WR_BCNTLFLT(len & 7)); 1443 wmb(); 1444 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1445 V_WR_LEN((len + 7) / 8)); 1446 wr_gen2(d, gen); 1447 1448 /* 1449 * This check is a hack we should really fix the logic so 1450 * that this can't happen 1451 */ 1452 if (m->m_type != MT_DONTFREE) 1453 m_freem(m); 1454 1455} 1456 1457/** 1458 * check_desc_avail - check descriptor availability on a send queue 1459 * @adap: the adapter 1460 * @q: the TX queue 1461 * @m: the packet needing the descriptors 1462 * @ndesc: the number of Tx descriptors needed 1463 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1464 * 1465 * Checks if the requested number of Tx descriptors is available on an 1466 * SGE send queue. If the queue is already suspended or not enough 1467 * descriptors are available the packet is queued for later transmission. 1468 * Must be called with the Tx queue locked. 1469 * 1470 * Returns 0 if enough descriptors are available, 1 if there aren't 1471 * enough descriptors and the packet has been queued, and 2 if the caller 1472 * needs to retry because there weren't enough descriptors at the 1473 * beginning of the call but some freed up in the mean time. 1474 */ 1475static __inline int 1476check_desc_avail(adapter_t *adap, struct sge_txq *q, 1477 struct mbuf *m, unsigned int ndesc, 1478 unsigned int qid) 1479{ 1480 /* 1481 * XXX We currently only use this for checking the control queue 1482 * the control queue is only used for binding qsets which happens 1483 * at init time so we are guaranteed enough descriptors 1484 */ 1485 if (__predict_false(!mbufq_empty(&q->sendq))) { 1486addq_exit: mbufq_tail(&q->sendq, m); 1487 return 1; 1488 } 1489 if (__predict_false(q->size - q->in_use < ndesc)) { 1490 1491 struct sge_qset *qs = txq_to_qset(q, qid); 1492 1493 printf("stopping q\n"); 1494 1495 setbit(&qs->txq_stopped, qid); 1496 smp_mb(); 1497 1498 if (should_restart_tx(q) && 1499 test_and_clear_bit(qid, &qs->txq_stopped)) 1500 return 2; 1501 1502 q->stops++; 1503 goto addq_exit; 1504 } 1505 return 0; 1506} 1507 1508 1509/** 1510 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1511 * @q: the SGE control Tx queue 1512 * 1513 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1514 * that send only immediate data (presently just the control queues) and 1515 * thus do not have any mbufs 1516 */ 1517static __inline void 1518reclaim_completed_tx_imm(struct sge_txq *q) 1519{ 1520 unsigned int reclaim = q->processed - q->cleaned; 1521 1522 mtx_assert(&q->lock, MA_OWNED); 1523 1524 q->in_use -= reclaim; 1525 q->cleaned += reclaim; 1526} 1527 1528static __inline int 1529immediate(const struct mbuf *m) 1530{ 1531 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1532} 1533 1534/** 1535 * ctrl_xmit - send a packet through an SGE control Tx queue 1536 * @adap: the adapter 1537 * @q: the control queue 1538 * @m: the packet 1539 * 1540 * Send a packet through an SGE control Tx queue. Packets sent through 1541 * a control queue must fit entirely as immediate data in a single Tx 1542 * descriptor and have no page fragments. 1543 */ 1544static int 1545ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1546{ 1547 int ret; 1548 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1549 1550 if (__predict_false(!immediate(m))) { 1551 m_freem(m); 1552 return 0; 1553 } 1554 1555 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1556 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1557 1558 mtx_lock(&q->lock); 1559again: reclaim_completed_tx_imm(q); 1560 1561 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1562 if (__predict_false(ret)) { 1563 if (ret == 1) { 1564 mtx_unlock(&q->lock); 1565 log(LOG_ERR, "no desc available\n"); 1566 1567 return (ENOSPC); 1568 } 1569 goto again; 1570 } 1571 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1572 1573 q->in_use++; 1574 if (++q->pidx >= q->size) { 1575 q->pidx = 0; 1576 q->gen ^= 1; 1577 } 1578 mtx_unlock(&q->lock); 1579 wmb(); 1580 t3_write_reg(adap, A_SG_KDOORBELL, 1581 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1582 return (0); 1583} 1584 1585 1586/** 1587 * restart_ctrlq - restart a suspended control queue 1588 * @qs: the queue set cotaining the control queue 1589 * 1590 * Resumes transmission on a suspended Tx control queue. 1591 */ 1592static void 1593restart_ctrlq(void *data, int npending) 1594{ 1595 struct mbuf *m; 1596 struct sge_qset *qs = (struct sge_qset *)data; 1597 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1598 adapter_t *adap = qs->port->adapter; 1599 1600 log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use); 1601 1602 mtx_lock(&q->lock); 1603again: reclaim_completed_tx_imm(q); 1604 1605 while (q->in_use < q->size && 1606 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1607 1608 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1609 1610 if (++q->pidx >= q->size) { 1611 q->pidx = 0; 1612 q->gen ^= 1; 1613 } 1614 q->in_use++; 1615 } 1616 if (!mbufq_empty(&q->sendq)) { 1617 setbit(&qs->txq_stopped, TXQ_CTRL); 1618 smp_mb(); 1619 1620 if (should_restart_tx(q) && 1621 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1622 goto again; 1623 q->stops++; 1624 } 1625 mtx_unlock(&q->lock); 1626 t3_write_reg(adap, A_SG_KDOORBELL, 1627 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1628} 1629 1630 1631/* 1632 * Send a management message through control queue 0 1633 */ 1634int 1635t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1636{ 1637 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1638} 1639 1640 1641/** 1642 * free_qset - free the resources of an SGE queue set 1643 * @sc: the controller owning the queue set 1644 * @q: the queue set 1645 * 1646 * Release the HW and SW resources associated with an SGE queue set, such 1647 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1648 * queue set must be quiesced prior to calling this. 1649 */ 1650void 1651t3_free_qset(adapter_t *sc, struct sge_qset *q) 1652{ 1653 int i; 1654 1655 t3_free_tx_desc_all(&q->txq[TXQ_ETH]); 1656 1657 for (i = 0; i < SGE_TXQ_PER_SET; i++) 1658 if (q->txq[i].txq_mr.br_ring != NULL) { 1659 free(q->txq[i].txq_mr.br_ring, M_DEVBUF); 1660 mtx_destroy(&q->txq[i].txq_mr.br_lock); 1661 } 1662 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1663 if (q->fl[i].desc) { 1664 mtx_lock(&sc->sge.reg_lock); 1665 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1666 mtx_unlock(&sc->sge.reg_lock); 1667 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1668 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1669 q->fl[i].desc_map); 1670 bus_dma_tag_destroy(q->fl[i].desc_tag); 1671 bus_dma_tag_destroy(q->fl[i].entry_tag); 1672 } 1673 if (q->fl[i].sdesc) { 1674 free_rx_bufs(sc, &q->fl[i]); 1675 free(q->fl[i].sdesc, M_DEVBUF); 1676 } 1677 } 1678 1679 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1680 if (q->txq[i].desc) { 1681 mtx_lock(&sc->sge.reg_lock); 1682 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1683 mtx_unlock(&sc->sge.reg_lock); 1684 bus_dmamap_unload(q->txq[i].desc_tag, 1685 q->txq[i].desc_map); 1686 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1687 q->txq[i].desc_map); 1688 bus_dma_tag_destroy(q->txq[i].desc_tag); 1689 bus_dma_tag_destroy(q->txq[i].entry_tag); 1690 MTX_DESTROY(&q->txq[i].lock); 1691 } 1692 if (q->txq[i].sdesc) { 1693 free(q->txq[i].sdesc, M_DEVBUF); 1694 } 1695 } 1696 1697 if (q->rspq.desc) { 1698 mtx_lock(&sc->sge.reg_lock); 1699 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1700 mtx_unlock(&sc->sge.reg_lock); 1701 1702 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1703 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1704 q->rspq.desc_map); 1705 bus_dma_tag_destroy(q->rspq.desc_tag); 1706 MTX_DESTROY(&q->rspq.lock); 1707 } 1708 1709 bzero(q, sizeof(*q)); 1710} 1711 1712/** 1713 * t3_free_sge_resources - free SGE resources 1714 * @sc: the adapter softc 1715 * 1716 * Frees resources used by the SGE queue sets. 1717 */ 1718void 1719t3_free_sge_resources(adapter_t *sc) 1720{ 1721 int i, nqsets; 1722 1723#ifdef IFNET_MULTIQUEUE 1724 panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__); 1725#endif 1726 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1727 nqsets += sc->port[i].nqsets; 1728 1729 for (i = 0; i < nqsets; ++i) 1730 t3_free_qset(sc, &sc->sge.qs[i]); 1731} 1732 1733/** 1734 * t3_sge_start - enable SGE 1735 * @sc: the controller softc 1736 * 1737 * Enables the SGE for DMAs. This is the last step in starting packet 1738 * transfers. 1739 */ 1740void 1741t3_sge_start(adapter_t *sc) 1742{ 1743 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1744} 1745 1746/** 1747 * t3_sge_stop - disable SGE operation 1748 * @sc: the adapter 1749 * 1750 * Disables the DMA engine. This can be called in emeregencies (e.g., 1751 * from error interrupts) or from normal process context. In the latter 1752 * case it also disables any pending queue restart tasklets. Note that 1753 * if it is called in interrupt context it cannot disable the restart 1754 * tasklets as it cannot wait, however the tasklets will have no effect 1755 * since the doorbells are disabled and the driver will call this again 1756 * later from process context, at which time the tasklets will be stopped 1757 * if they are still running. 1758 */ 1759void 1760t3_sge_stop(adapter_t *sc) 1761{ 1762 int i, nqsets; 1763 1764 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1765 1766 if (sc->tq == NULL) 1767 return; 1768 1769 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1770 nqsets += sc->port[i].nqsets; 1771#ifdef notyet 1772 /* 1773 * 1774 * XXX 1775 */ 1776 for (i = 0; i < nqsets; ++i) { 1777 struct sge_qset *qs = &sc->sge.qs[i]; 1778 1779 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 1780 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 1781 } 1782#endif 1783} 1784 1785/** 1786 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 1787 * @adapter: the adapter 1788 * @q: the Tx queue to reclaim descriptors from 1789 * @reclaimable: the number of descriptors to reclaim 1790 * @m_vec_size: maximum number of buffers to reclaim 1791 * @desc_reclaimed: returns the number of descriptors reclaimed 1792 * 1793 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1794 * Tx buffers. Called with the Tx queue lock held. 1795 * 1796 * Returns number of buffers of reclaimed 1797 */ 1798void 1799t3_free_tx_desc(struct sge_txq *q, int reclaimable) 1800{ 1801 struct tx_sw_desc *txsd; 1802 unsigned int cidx; 1803 1804#ifdef T3_TRACE 1805 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1806 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 1807#endif 1808 cidx = q->cidx; 1809 txsd = &q->sdesc[cidx]; 1810 DPRINTF("reclaiming %d WR\n", reclaimable); 1811 mtx_assert(&q->lock, MA_OWNED); 1812 while (reclaimable--) { 1813 DPRINTF("cidx=%d d=%p\n", cidx, txsd); 1814 if (txsd->mi.mi_base != NULL) { 1815 if (txsd->flags & TX_SW_DESC_MAPPED) { 1816 bus_dmamap_unload(q->entry_tag, txsd->map); 1817 txsd->flags &= ~TX_SW_DESC_MAPPED; 1818 } 1819 m_freem_iovec(&txsd->mi); 1820 buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__); 1821 txsd->mi.mi_base = NULL; 1822 1823#if defined(DIAGNOSTIC) && 0 1824 if (m_get_priority(txsd->m[0]) != cidx) 1825 printf("pri=%d cidx=%d\n", 1826 (int)m_get_priority(txsd->m[0]), cidx); 1827#endif 1828 1829 } else 1830 q->txq_skipped++; 1831 1832 ++txsd; 1833 if (++cidx == q->size) { 1834 cidx = 0; 1835 txsd = q->sdesc; 1836 } 1837 } 1838 q->cidx = cidx; 1839 1840} 1841 1842void 1843t3_free_tx_desc_all(struct sge_txq *q) 1844{ 1845 int i; 1846 struct tx_sw_desc *txsd; 1847 1848 for (i = 0; i < q->size; i++) { 1849 txsd = &q->sdesc[i]; 1850 if (txsd->mi.mi_base != NULL) { 1851 if (txsd->flags & TX_SW_DESC_MAPPED) { 1852 bus_dmamap_unload(q->entry_tag, txsd->map); 1853 txsd->flags &= ~TX_SW_DESC_MAPPED; 1854 } 1855 m_freem_iovec(&txsd->mi); 1856 bzero(&txsd->mi, sizeof(txsd->mi)); 1857 } 1858 } 1859} 1860 1861/** 1862 * is_new_response - check if a response is newly written 1863 * @r: the response descriptor 1864 * @q: the response queue 1865 * 1866 * Returns true if a response descriptor contains a yet unprocessed 1867 * response. 1868 */ 1869static __inline int 1870is_new_response(const struct rsp_desc *r, 1871 const struct sge_rspq *q) 1872{ 1873 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1874} 1875 1876#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1877#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1878 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1879 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1880 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1881 1882/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1883#define NOMEM_INTR_DELAY 2500 1884 1885/** 1886 * write_ofld_wr - write an offload work request 1887 * @adap: the adapter 1888 * @m: the packet to send 1889 * @q: the Tx queue 1890 * @pidx: index of the first Tx descriptor to write 1891 * @gen: the generation value to use 1892 * @ndesc: number of descriptors the packet will occupy 1893 * 1894 * Write an offload work request to send the supplied packet. The packet 1895 * data already carry the work request with most fields populated. 1896 */ 1897static void 1898write_ofld_wr(adapter_t *adap, struct mbuf *m, 1899 struct sge_txq *q, unsigned int pidx, 1900 unsigned int gen, unsigned int ndesc, 1901 bus_dma_segment_t *segs, unsigned int nsegs) 1902{ 1903 unsigned int sgl_flits, flits; 1904 struct work_request_hdr *from; 1905 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1906 struct tx_desc *d = &q->desc[pidx]; 1907 struct txq_state txqs; 1908 1909 if (immediate(m) && segs == NULL) { 1910 write_imm(d, m, m->m_len, gen); 1911 return; 1912 } 1913 1914 /* Only TX_DATA builds SGLs */ 1915 from = mtod(m, struct work_request_hdr *); 1916 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 1917 1918 flits = m->m_len / 8; 1919 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1920 1921 make_sgl(sgp, segs, nsegs); 1922 sgl_flits = sgl_len(nsegs); 1923 1924 txqs.gen = gen; 1925 txqs.pidx = pidx; 1926 txqs.compl = 0; 1927 1928 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1929 from->wr_hi, from->wr_lo); 1930} 1931 1932/** 1933 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1934 * @m: the packet 1935 * 1936 * Returns the number of Tx descriptors needed for the given offload 1937 * packet. These packets are already fully constructed. 1938 */ 1939static __inline unsigned int 1940calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1941{ 1942 unsigned int flits, cnt = 0; 1943 1944 1945 if (m->m_len <= WR_LEN) 1946 return 1; /* packet fits as immediate data */ 1947 1948 if (m->m_flags & M_IOVEC) 1949 cnt = mtomv(m)->mv_count; 1950 1951 /* headers */ 1952 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; 1953 1954 return flits_to_desc(flits + sgl_len(cnt)); 1955} 1956 1957/** 1958 * ofld_xmit - send a packet through an offload queue 1959 * @adap: the adapter 1960 * @q: the Tx offload queue 1961 * @m: the packet 1962 * 1963 * Send an offload packet through an SGE offload queue. 1964 */ 1965static int 1966ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1967{ 1968 int ret, nsegs; 1969 unsigned int ndesc; 1970 unsigned int pidx, gen; 1971 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 1972 struct tx_sw_desc *stx; 1973 1974 nsegs = m_get_sgllen(m); 1975 vsegs = m_get_sgl(m); 1976 ndesc = calc_tx_descs_ofld(m, nsegs); 1977 busdma_map_sgl(vsegs, segs, nsegs); 1978 1979 stx = &q->sdesc[q->pidx]; 1980 KASSERT(stx->mi.mi_base == NULL, ("mi_base set")); 1981 1982 mtx_lock(&q->lock); 1983again: reclaim_completed_tx_(q, 16); 1984 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1985 if (__predict_false(ret)) { 1986 if (ret == 1) { 1987 printf("no ofld desc avail\n"); 1988 1989 m_set_priority(m, ndesc); /* save for restart */ 1990 mtx_unlock(&q->lock); 1991 return (EINTR); 1992 } 1993 goto again; 1994 } 1995 1996 gen = q->gen; 1997 q->in_use += ndesc; 1998 pidx = q->pidx; 1999 q->pidx += ndesc; 2000 if (q->pidx >= q->size) { 2001 q->pidx -= q->size; 2002 q->gen ^= 1; 2003 } 2004#ifdef T3_TRACE 2005 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2006 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2007 ndesc, pidx, skb->len, skb->len - skb->data_len, 2008 skb_shinfo(skb)->nr_frags); 2009#endif 2010 mtx_unlock(&q->lock); 2011 2012 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2013 check_ring_tx_db(adap, q); 2014 2015 return (0); 2016} 2017 2018/** 2019 * restart_offloadq - restart a suspended offload queue 2020 * @qs: the queue set cotaining the offload queue 2021 * 2022 * Resumes transmission on a suspended Tx offload queue. 2023 */ 2024static void 2025restart_offloadq(void *data, int npending) 2026{ 2027 struct mbuf *m; 2028 struct sge_qset *qs = data; 2029 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2030 adapter_t *adap = qs->port->adapter; 2031 bus_dma_segment_t segs[TX_MAX_SEGS]; 2032 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2033 int nsegs, cleaned; 2034 2035 mtx_lock(&q->lock); 2036again: cleaned = reclaim_completed_tx_(q, 16); 2037 2038 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2039 unsigned int gen, pidx; 2040 unsigned int ndesc = m_get_priority(m); 2041 2042 if (__predict_false(q->size - q->in_use < ndesc)) { 2043 setbit(&qs->txq_stopped, TXQ_OFLD); 2044 smp_mb(); 2045 2046 if (should_restart_tx(q) && 2047 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2048 goto again; 2049 q->stops++; 2050 break; 2051 } 2052 2053 gen = q->gen; 2054 q->in_use += ndesc; 2055 pidx = q->pidx; 2056 q->pidx += ndesc; 2057 if (q->pidx >= q->size) { 2058 q->pidx -= q->size; 2059 q->gen ^= 1; 2060 } 2061 2062 (void)mbufq_dequeue(&q->sendq); 2063 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2064 mtx_unlock(&q->lock); 2065 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2066 mtx_lock(&q->lock); 2067 } 2068 mtx_unlock(&q->lock); 2069 2070#if USE_GTS 2071 set_bit(TXQ_RUNNING, &q->flags); 2072 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2073#endif 2074 t3_write_reg(adap, A_SG_KDOORBELL, 2075 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2076} 2077 2078/** 2079 * queue_set - return the queue set a packet should use 2080 * @m: the packet 2081 * 2082 * Maps a packet to the SGE queue set it should use. The desired queue 2083 * set is carried in bits 1-3 in the packet's priority. 2084 */ 2085static __inline int 2086queue_set(const struct mbuf *m) 2087{ 2088 return m_get_priority(m) >> 1; 2089} 2090 2091/** 2092 * is_ctrl_pkt - return whether an offload packet is a control packet 2093 * @m: the packet 2094 * 2095 * Determines whether an offload packet should use an OFLD or a CTRL 2096 * Tx queue. This is indicated by bit 0 in the packet's priority. 2097 */ 2098static __inline int 2099is_ctrl_pkt(const struct mbuf *m) 2100{ 2101 return m_get_priority(m) & 1; 2102} 2103 2104/** 2105 * t3_offload_tx - send an offload packet 2106 * @tdev: the offload device to send to 2107 * @m: the packet 2108 * 2109 * Sends an offload packet. We use the packet priority to select the 2110 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2111 * should be sent as regular or control, bits 1-3 select the queue set. 2112 */ 2113int 2114t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2115{ 2116 adapter_t *adap = tdev2adap(tdev); 2117 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2118 2119 if (__predict_false(is_ctrl_pkt(m))) 2120 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 2121 2122 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 2123} 2124 2125/** 2126 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2127 * @tdev: the offload device that will be receiving the packets 2128 * @q: the SGE response queue that assembled the bundle 2129 * @m: the partial bundle 2130 * @n: the number of packets in the bundle 2131 * 2132 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2133 */ 2134static __inline void 2135deliver_partial_bundle(struct t3cdev *tdev, 2136 struct sge_rspq *q, 2137 struct mbuf *mbufs[], int n) 2138{ 2139 if (n) { 2140 q->offload_bundles++; 2141 cxgb_ofld_recv(tdev, mbufs, n); 2142 } 2143} 2144 2145static __inline int 2146rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2147 struct mbuf *m, struct mbuf *rx_gather[], 2148 unsigned int gather_idx) 2149{ 2150 2151 rq->offload_pkts++; 2152 m->m_pkthdr.header = mtod(m, void *); 2153 rx_gather[gather_idx++] = m; 2154 if (gather_idx == RX_BUNDLE_SIZE) { 2155 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2156 gather_idx = 0; 2157 rq->offload_bundles++; 2158 } 2159 return (gather_idx); 2160} 2161 2162static void 2163restart_tx(struct sge_qset *qs) 2164{ 2165 struct adapter *sc = qs->port->adapter; 2166 2167 2168 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2169 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2170 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2171 qs->txq[TXQ_OFLD].restarts++; 2172 DPRINTF("restarting TXQ_OFLD\n"); 2173 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2174 } 2175 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2176 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2177 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2178 qs->txq[TXQ_CTRL].in_use); 2179 2180 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2181 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2182 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2183 qs->txq[TXQ_CTRL].restarts++; 2184 DPRINTF("restarting TXQ_CTRL\n"); 2185 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2186 } 2187} 2188 2189/** 2190 * t3_sge_alloc_qset - initialize an SGE queue set 2191 * @sc: the controller softc 2192 * @id: the queue set id 2193 * @nports: how many Ethernet ports will be using this queue set 2194 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2195 * @p: configuration parameters for this queue set 2196 * @ntxq: number of Tx queues for the queue set 2197 * @pi: port info for queue set 2198 * 2199 * Allocate resources and initialize an SGE queue set. A queue set 2200 * comprises a response queue, two Rx free-buffer queues, and up to 3 2201 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2202 * queue, offload queue, and control queue. 2203 */ 2204int 2205t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2206 const struct qset_params *p, int ntxq, struct port_info *pi) 2207{ 2208 struct sge_qset *q = &sc->sge.qs[id]; 2209 int i, header_size, ret = 0; 2210 2211 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2212 if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *), 2213 M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) { 2214 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2215 goto err; 2216 } 2217 q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0; 2218 q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size; 2219 mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF); 2220 } 2221 2222 init_qset_cntxt(q, id); 2223 q->idx = id; 2224 2225 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2226 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2227 &q->fl[0].desc, &q->fl[0].sdesc, 2228 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2229 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2230 printf("error %d from alloc ring fl0\n", ret); 2231 goto err; 2232 } 2233 2234 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2235 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2236 &q->fl[1].desc, &q->fl[1].sdesc, 2237 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2238 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2239 printf("error %d from alloc ring fl1\n", ret); 2240 goto err; 2241 } 2242 2243 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2244 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2245 &q->rspq.desc_tag, &q->rspq.desc_map, 2246 NULL, NULL)) != 0) { 2247 printf("error %d from alloc ring rspq\n", ret); 2248 goto err; 2249 } 2250 2251 for (i = 0; i < ntxq; ++i) { 2252 /* 2253 * The control queue always uses immediate data so does not 2254 * need to keep track of any mbufs. 2255 * XXX Placeholder for future TOE support. 2256 */ 2257 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2258 2259 if ((ret = alloc_ring(sc, p->txq_size[i], 2260 sizeof(struct tx_desc), sz, 2261 &q->txq[i].phys_addr, &q->txq[i].desc, 2262 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2263 &q->txq[i].desc_map, 2264 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2265 printf("error %d from alloc ring tx %i\n", ret, i); 2266 goto err; 2267 } 2268 mbufq_init(&q->txq[i].sendq); 2269 q->txq[i].gen = 1; 2270 q->txq[i].size = p->txq_size[i]; 2271 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2272 device_get_unit(sc->dev), irq_vec_idx, i); 2273 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); 2274 } 2275 2276 q->txq[TXQ_ETH].port = pi; 2277 2278 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2279 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2280 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]); 2281 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]); 2282 2283 q->fl[0].gen = q->fl[1].gen = 1; 2284 q->fl[0].size = p->fl_size; 2285 q->fl[1].size = p->jumbo_size; 2286 2287 q->rspq.gen = 1; 2288 q->rspq.cidx = 0; 2289 q->rspq.size = p->rspq_size; 2290 2291 2292 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); 2293 q->txq[TXQ_ETH].stop_thres = nports * 2294 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2295 2296 q->fl[0].buf_size = (MCLBYTES - header_size); 2297 q->fl[0].zone = zone_clust; 2298 q->fl[0].type = EXT_CLUSTER; 2299#if __FreeBSD_version > 800000 2300 if (cxgb_use_16k_clusters) { 2301 q->fl[1].buf_size = MJUM16BYTES - header_size; 2302 q->fl[1].zone = zone_jumbo16; 2303 q->fl[1].type = EXT_JUMBO16; 2304 } else { 2305 q->fl[1].buf_size = MJUM9BYTES - header_size; 2306 q->fl[1].zone = zone_jumbo9; 2307 q->fl[1].type = EXT_JUMBO9; 2308 } 2309#else 2310 q->fl[1].buf_size = MJUMPAGESIZE - header_size; 2311 q->fl[1].zone = zone_jumbop; 2312 q->fl[1].type = EXT_JUMBOP; 2313#endif 2314 q->lro.enabled = lro_default; 2315 2316 mtx_lock(&sc->sge.reg_lock); 2317 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2318 q->rspq.phys_addr, q->rspq.size, 2319 q->fl[0].buf_size, 1, 0); 2320 if (ret) { 2321 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2322 goto err_unlock; 2323 } 2324 2325 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2326 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2327 q->fl[i].phys_addr, q->fl[i].size, 2328 q->fl[i].buf_size, p->cong_thres, 1, 2329 0); 2330 if (ret) { 2331 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2332 goto err_unlock; 2333 } 2334 } 2335 2336 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2337 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2338 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2339 1, 0); 2340 if (ret) { 2341 printf("error %d from t3_sge_init_ecntxt\n", ret); 2342 goto err_unlock; 2343 } 2344 2345 if (ntxq > 1) { 2346 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2347 USE_GTS, SGE_CNTXT_OFLD, id, 2348 q->txq[TXQ_OFLD].phys_addr, 2349 q->txq[TXQ_OFLD].size, 0, 1, 0); 2350 if (ret) { 2351 printf("error %d from t3_sge_init_ecntxt\n", ret); 2352 goto err_unlock; 2353 } 2354 } 2355 2356 if (ntxq > 2) { 2357 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2358 SGE_CNTXT_CTRL, id, 2359 q->txq[TXQ_CTRL].phys_addr, 2360 q->txq[TXQ_CTRL].size, 2361 q->txq[TXQ_CTRL].token, 1, 0); 2362 if (ret) { 2363 printf("error %d from t3_sge_init_ecntxt\n", ret); 2364 goto err_unlock; 2365 } 2366 } 2367 2368 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2369 device_get_unit(sc->dev), irq_vec_idx); 2370 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2371 2372 mtx_unlock(&sc->sge.reg_lock); 2373 t3_update_qset_coalesce(q, p); 2374 q->port = pi; 2375 2376 refill_fl(sc, &q->fl[0], q->fl[0].size); 2377 refill_fl(sc, &q->fl[1], q->fl[1].size); 2378 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2379 2380 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2381 V_NEWTIMER(q->rspq.holdoff_tmr)); 2382 2383 return (0); 2384 2385err_unlock: 2386 mtx_unlock(&sc->sge.reg_lock); 2387err: 2388 t3_free_qset(sc, q); 2389 2390 return (ret); 2391} 2392 2393void 2394t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2395{ 2396 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2397 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2398 struct ifnet *ifp = pi->ifp; 2399 2400 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2401 2402 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2403 cpl->csum_valid && cpl->csum == 0xffff) { 2404 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2405 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2406 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2407 m->m_pkthdr.csum_data = 0xffff; 2408 } 2409 /* 2410 * XXX need to add VLAN support for 6.x 2411 */ 2412#ifdef VLAN_SUPPORTED 2413 if (__predict_false(cpl->vlan_valid)) { 2414 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2415 m->m_flags |= M_VLANTAG; 2416 } 2417#endif 2418 2419 m->m_pkthdr.rcvif = ifp; 2420 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2421#ifndef DISABLE_MBUF_IOVEC 2422 m_explode(m); 2423#endif 2424 /* 2425 * adjust after conversion to mbuf chain 2426 */ 2427 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2428 m->m_len -= (sizeof(*cpl) + ethpad); 2429 m->m_data += (sizeof(*cpl) + ethpad); 2430 2431 (*ifp->if_input)(ifp, m); 2432} 2433 2434static void 2435ext_free_handler(void *cl, void * arg) 2436{ 2437 uintptr_t type = (uintptr_t)arg; 2438 uma_zone_t zone; 2439 struct mbuf *m; 2440 2441 m = cl; 2442 zone = m_getzonefromtype(type); 2443 m->m_ext.ext_type = (int)type; 2444 cxgb_ext_freed++; 2445 cxgb_cache_put(zone, cl); 2446} 2447 2448static void 2449init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone) 2450{ 2451 struct mbuf *m; 2452 int header_size; 2453 2454 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + 2455 sizeof(struct m_ext_) + sizeof(uint32_t); 2456 2457 bzero(cl, header_size); 2458 m = (struct mbuf *)cl; 2459 2460 SLIST_INIT(&m->m_pkthdr.tags); 2461 m->m_type = MT_DATA; 2462 m->m_flags = flags | M_NOFREE | M_EXT; 2463 m->m_data = cl + header_size; 2464 m->m_ext.ext_buf = cl; 2465 m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t)); 2466 m->m_ext.ext_size = m_getsizefromtype(type); 2467 m->m_ext.ext_free = ext_free_handler; 2468 m->m_ext.ext_args = (void *)(uintptr_t)type; 2469 m->m_ext.ext_type = EXT_EXTREF; 2470 *(m->m_ext.ref_cnt) = 1; 2471 DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt); 2472} 2473 2474 2475/** 2476 * get_packet - return the next ingress packet buffer from a free list 2477 * @adap: the adapter that received the packet 2478 * @drop_thres: # of remaining buffers before we start dropping packets 2479 * @qs: the qset that the SGE free list holding the packet belongs to 2480 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2481 * @r: response descriptor 2482 * 2483 * Get the next packet from a free list and complete setup of the 2484 * sk_buff. If the packet is small we make a copy and recycle the 2485 * original buffer, otherwise we use the original buffer itself. If a 2486 * positive drop threshold is supplied packets are dropped and their 2487 * buffers recycled if (a) the number of remaining buffers is under the 2488 * threshold and the packet is too big to copy, or (b) the packet should 2489 * be copied but there is no memory for the copy. 2490 */ 2491#ifdef DISABLE_MBUF_IOVEC 2492 2493static int 2494get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2495 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2496{ 2497 2498 unsigned int len_cq = ntohl(r->len_cq); 2499 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2500 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2501 uint32_t len = G_RSPD_LEN(len_cq); 2502 uint32_t flags = ntohl(r->flags); 2503 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2504 struct mbuf *m; 2505 uint32_t *ref; 2506 int ret = 0; 2507 2508 prefetch(sd->rxsd_cl); 2509 2510 fl->credits--; 2511 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2512 2513 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2514 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2515 goto skip_recycle; 2516 cl = mtod(m0, void *); 2517 memcpy(cl, sd->data, len); 2518 recycle_rx_buf(adap, fl, fl->cidx); 2519 m = m0; 2520 } else { 2521 skip_recycle: 2522 int flags = 0; 2523 bus_dmamap_unload(fl->entry_tag, sd->map); 2524 cl = sd->rxsd_cl; 2525 m = m0 = (struct mbuf *)cl; 2526 2527 m0->m_len = len; 2528 if ((sopeop == RSPQ_SOP_EOP) || 2529 (sopeop == RSPQ_SOP)) 2530 flags = M_PKTHDR; 2531 init_cluster_mbuf(cl, flags, fl->type, fl->zone); 2532 } 2533 2534 switch(sopeop) { 2535 case RSPQ_SOP_EOP: 2536 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2537 mh->mh_head = mh->mh_tail = m; 2538 m->m_pkthdr.len = len; 2539 ret = 1; 2540 break; 2541 case RSPQ_NSOP_NEOP: 2542 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2543 if (mh->mh_tail == NULL) { 2544 printf("discarding intermediate descriptor entry\n"); 2545 m_freem(m); 2546 break; 2547 } 2548 mh->mh_tail->m_next = m; 2549 mh->mh_tail = m; 2550 mh->mh_head->m_pkthdr.len += len; 2551 ret = 0; 2552 break; 2553 case RSPQ_SOP: 2554 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2555 m->m_pkthdr.len = len; 2556 mh->mh_head = mh->mh_tail = m; 2557 ret = 0; 2558 break; 2559 case RSPQ_EOP: 2560 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2561 mh->mh_head->m_pkthdr.len += len; 2562 mh->mh_tail->m_next = m; 2563 mh->mh_tail = m; 2564 ret = 1; 2565 break; 2566 } 2567 if (++fl->cidx == fl->size) 2568 fl->cidx = 0; 2569 2570 return (ret); 2571} 2572 2573#else 2574 2575static int 2576get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2577 struct mbuf **m, struct rsp_desc *r) 2578{ 2579 2580 unsigned int len_cq = ntohl(r->len_cq); 2581 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2582 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2583 uint32_t len = G_RSPD_LEN(len_cq); 2584 uint32_t flags = ntohl(r->flags); 2585 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2586 void *cl; 2587 int ret = 0; 2588 struct mbuf *m0; 2589#if 0 2590 if ((sd + 1 )->rxsd_cl) 2591 prefetch((sd + 1)->rxsd_cl); 2592 if ((sd + 2)->rxsd_cl) 2593 prefetch((sd + 2)->rxsd_cl); 2594#endif 2595 DPRINTF("rx cpu=%d\n", curcpu); 2596 fl->credits--; 2597 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2598 2599 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2600 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2601 goto skip_recycle; 2602 cl = mtod(m0, void *); 2603 memcpy(cl, sd->data, len); 2604 recycle_rx_buf(adap, fl, fl->cidx); 2605 *m = m0; 2606 } else { 2607 skip_recycle: 2608 bus_dmamap_unload(fl->entry_tag, sd->map); 2609 cl = sd->rxsd_cl; 2610 *m = m0 = (struct mbuf *)cl; 2611 } 2612 2613 switch(sopeop) { 2614 case RSPQ_SOP_EOP: 2615 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2616 if (cl == sd->rxsd_cl) 2617 init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone); 2618 m0->m_len = m0->m_pkthdr.len = len; 2619 ret = 1; 2620 goto done; 2621 break; 2622 case RSPQ_NSOP_NEOP: 2623 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2624 panic("chaining unsupported"); 2625 ret = 0; 2626 break; 2627 case RSPQ_SOP: 2628 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2629 panic("chaining unsupported"); 2630 m_iovinit(m0); 2631 ret = 0; 2632 break; 2633 case RSPQ_EOP: 2634 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2635 panic("chaining unsupported"); 2636 ret = 1; 2637 break; 2638 } 2639 panic("append not supported"); 2640#if 0 2641 m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref); 2642#endif 2643done: 2644 if (++fl->cidx == fl->size) 2645 fl->cidx = 0; 2646 2647 return (ret); 2648} 2649#endif 2650/** 2651 * handle_rsp_cntrl_info - handles control information in a response 2652 * @qs: the queue set corresponding to the response 2653 * @flags: the response control flags 2654 * 2655 * Handles the control information of an SGE response, such as GTS 2656 * indications and completion credits for the queue set's Tx queues. 2657 * HW coalesces credits, we don't do any extra SW coalescing. 2658 */ 2659static __inline void 2660handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2661{ 2662 unsigned int credits; 2663 2664#if USE_GTS 2665 if (flags & F_RSPD_TXQ0_GTS) 2666 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2667#endif 2668 credits = G_RSPD_TXQ0_CR(flags); 2669 if (credits) 2670 qs->txq[TXQ_ETH].processed += credits; 2671 2672 credits = G_RSPD_TXQ2_CR(flags); 2673 if (credits) 2674 qs->txq[TXQ_CTRL].processed += credits; 2675 2676# if USE_GTS 2677 if (flags & F_RSPD_TXQ1_GTS) 2678 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2679# endif 2680 credits = G_RSPD_TXQ1_CR(flags); 2681 if (credits) 2682 qs->txq[TXQ_OFLD].processed += credits; 2683 2684} 2685 2686static void 2687check_ring_db(adapter_t *adap, struct sge_qset *qs, 2688 unsigned int sleeping) 2689{ 2690 ; 2691} 2692 2693/** 2694 * process_responses - process responses from an SGE response queue 2695 * @adap: the adapter 2696 * @qs: the queue set to which the response queue belongs 2697 * @budget: how many responses can be processed in this round 2698 * 2699 * Process responses from an SGE response queue up to the supplied budget. 2700 * Responses include received packets as well as credits and other events 2701 * for the queues that belong to the response queue's queue set. 2702 * A negative budget is effectively unlimited. 2703 * 2704 * Additionally choose the interrupt holdoff time for the next interrupt 2705 * on this queue. If the system is under memory shortage use a fairly 2706 * long delay to help recovery. 2707 */ 2708int 2709process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2710{ 2711 struct sge_rspq *rspq = &qs->rspq; 2712 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2713 int budget_left = budget; 2714 unsigned int sleeping = 0; 2715 int lro = qs->lro.enabled; 2716 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2717 int ngathered = 0; 2718#ifdef DEBUG 2719 static int last_holdoff = 0; 2720 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2721 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2722 last_holdoff = rspq->holdoff_tmr; 2723 } 2724#endif 2725 rspq->next_holdoff = rspq->holdoff_tmr; 2726 2727 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2728 int eth, eop = 0, ethpad = 0; 2729 uint32_t flags = ntohl(r->flags); 2730 uint32_t rss_csum = *(const uint32_t *)r; 2731 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2732 2733 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2734 2735 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2736 /* XXX */ 2737 printf("async notification\n"); 2738 2739 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2740 struct mbuf *m = NULL; 2741 2742#ifdef DISABLE_MBUF_IOVEC 2743 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 2744 r->rss_hdr.opcode, rspq->cidx); 2745 2746 m = rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2747 if (m == NULL) { 2748 rspq->next_holdoff = NOMEM_INTR_DELAY; 2749 budget_left--; 2750 break; 2751 } 2752 2753 get_imm_packet(adap, r, &rspq->rspq_mh); 2754 eop = 1; 2755#else 2756 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 2757 r->rss_hdr.opcode, rspq->cidx); 2758 if (rspq->rspq_mbuf == NULL) 2759 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); 2760 else 2761 m = m_gethdr(M_DONTWAIT, MT_DATA); 2762 2763 /* 2764 * XXX revisit me 2765 */ 2766 if (rspq->rspq_mbuf == NULL && m == NULL) { 2767 rspq->next_holdoff = NOMEM_INTR_DELAY; 2768 budget_left--; 2769 break; 2770 } 2771 get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags); 2772 2773 eop = 1; 2774 rspq->imm_data++; 2775#endif 2776 } else if (r->len_cq) { 2777 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2778 2779#ifdef DISABLE_MBUF_IOVEC 2780 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 2781#else 2782 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r); 2783#ifdef IFNET_MULTIQUEUE 2784 rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash; 2785#endif 2786#endif 2787 ethpad = 2; 2788 } else { 2789 DPRINTF("pure response\n"); 2790 rspq->pure_rsps++; 2791 } 2792 2793 if (flags & RSPD_CTRL_MASK) { 2794 sleeping |= flags & RSPD_GTS_MASK; 2795 handle_rsp_cntrl_info(qs, flags); 2796 } 2797 2798 r++; 2799 if (__predict_false(++rspq->cidx == rspq->size)) { 2800 rspq->cidx = 0; 2801 rspq->gen ^= 1; 2802 r = rspq->desc; 2803 } 2804 prefetch(r); 2805 if (++rspq->credits >= (rspq->size / 4)) { 2806 refill_rspq(adap, rspq, rspq->credits); 2807 rspq->credits = 0; 2808 } 2809 DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags); 2810 2811 if (!eth && eop) { 2812 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 2813 /* 2814 * XXX size mismatch 2815 */ 2816 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 2817 2818 ngathered = rx_offload(&adap->tdev, rspq, 2819 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 2820 rspq->rspq_mh.mh_head = NULL; 2821 DPRINTF("received offload packet\n"); 2822 2823 } else if (eth && eop) { 2824 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *)); 2825 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES); 2826 2827 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad, 2828 rss_hash, rss_csum, lro); 2829 DPRINTF("received tunnel packet\n"); 2830 rspq->rspq_mh.mh_head = NULL; 2831 2832 } 2833 __refill_fl_lt(adap, &qs->fl[0], 32); 2834 __refill_fl_lt(adap, &qs->fl[1], 32); 2835 --budget_left; 2836 } 2837 2838 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2839 t3_lro_flush(adap, qs, &qs->lro); 2840 2841 if (sleeping) 2842 check_ring_db(adap, qs, sleeping); 2843 2844 smp_mb(); /* commit Tx queue processed updates */ 2845 if (__predict_false(qs->txq_stopped > 1)) { 2846 printf("restarting tx on %p\n", qs); 2847 2848 restart_tx(qs); 2849 } 2850 2851 __refill_fl_lt(adap, &qs->fl[0], 512); 2852 __refill_fl_lt(adap, &qs->fl[1], 512); 2853 budget -= budget_left; 2854 return (budget); 2855} 2856 2857/* 2858 * A helper function that processes responses and issues GTS. 2859 */ 2860static __inline int 2861process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2862{ 2863 int work; 2864 static int last_holdoff = 0; 2865 2866 work = process_responses(adap, rspq_to_qset(rq), -1); 2867 2868 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2869 printf("next_holdoff=%d\n", rq->next_holdoff); 2870 last_holdoff = rq->next_holdoff; 2871 } 2872 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2873 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2874 2875 return (work); 2876} 2877 2878 2879/* 2880 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2881 * Handles data events from SGE response queues as well as error and other 2882 * async events as they all use the same interrupt pin. We use one SGE 2883 * response queue per port in this mode and protect all response queues with 2884 * queue 0's lock. 2885 */ 2886void 2887t3b_intr(void *data) 2888{ 2889 uint32_t i, map; 2890 adapter_t *adap = data; 2891 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2892 2893 t3_write_reg(adap, A_PL_CLI, 0); 2894 map = t3_read_reg(adap, A_SG_DATA_INTR); 2895 2896 if (!map) 2897 return; 2898 2899 if (__predict_false(map & F_ERRINTR)) 2900 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2901 2902 mtx_lock(&q0->lock); 2903 for_each_port(adap, i) 2904 if (map & (1 << i)) 2905 process_responses_gts(adap, &adap->sge.qs[i].rspq); 2906 mtx_unlock(&q0->lock); 2907} 2908 2909/* 2910 * The MSI interrupt handler. This needs to handle data events from SGE 2911 * response queues as well as error and other async events as they all use 2912 * the same MSI vector. We use one SGE response queue per port in this mode 2913 * and protect all response queues with queue 0's lock. 2914 */ 2915void 2916t3_intr_msi(void *data) 2917{ 2918 adapter_t *adap = data; 2919 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2920 int i, new_packets = 0; 2921 2922 mtx_lock(&q0->lock); 2923 2924 for_each_port(adap, i) 2925 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 2926 new_packets = 1; 2927 mtx_unlock(&q0->lock); 2928 if (new_packets == 0) 2929 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2930} 2931 2932void 2933t3_intr_msix(void *data) 2934{ 2935 struct sge_qset *qs = data; 2936 adapter_t *adap = qs->port->adapter; 2937 struct sge_rspq *rspq = &qs->rspq; 2938#ifndef IFNET_MULTIQUEUE 2939 mtx_lock(&rspq->lock); 2940#else 2941 if (mtx_trylock(&rspq->lock)) 2942#endif 2943 { 2944 2945 if (process_responses_gts(adap, rspq) == 0) 2946 rspq->unhandled_irqs++; 2947 mtx_unlock(&rspq->lock); 2948 } 2949} 2950 2951#define QDUMP_SBUF_SIZE 32 * 400 2952static int 2953t3_dump_rspq(SYSCTL_HANDLER_ARGS) 2954{ 2955 struct sge_rspq *rspq; 2956 struct sge_qset *qs; 2957 int i, err, dump_end, idx; 2958 static int multiplier = 1; 2959 struct sbuf *sb; 2960 struct rsp_desc *rspd; 2961 uint32_t data[4]; 2962 2963 rspq = arg1; 2964 qs = rspq_to_qset(rspq); 2965 if (rspq->rspq_dump_count == 0) 2966 return (0); 2967 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 2968 log(LOG_WARNING, 2969 "dump count is too large %d\n", rspq->rspq_dump_count); 2970 rspq->rspq_dump_count = 0; 2971 return (EINVAL); 2972 } 2973 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 2974 log(LOG_WARNING, 2975 "dump start of %d is greater than queue size\n", 2976 rspq->rspq_dump_start); 2977 rspq->rspq_dump_start = 0; 2978 return (EINVAL); 2979 } 2980 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 2981 if (err) 2982 return (err); 2983retry_sbufops: 2984 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 2985 2986 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 2987 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 2988 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 2989 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 2990 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 2991 2992 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 2993 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 2994 2995 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 2996 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 2997 idx = i & (RSPQ_Q_SIZE-1); 2998 2999 rspd = &rspq->desc[idx]; 3000 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3001 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3002 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3003 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3004 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3005 be32toh(rspd->len_cq), rspd->intr_gen); 3006 } 3007 if (sbuf_overflowed(sb)) { 3008 sbuf_delete(sb); 3009 multiplier++; 3010 goto retry_sbufops; 3011 } 3012 sbuf_finish(sb); 3013 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3014 sbuf_delete(sb); 3015 return (err); 3016} 3017 3018 3019/* 3020 * broken by recent mbuf changes 3021 */ 3022static int 3023t3_dump_txq(SYSCTL_HANDLER_ARGS) 3024{ 3025 struct sge_txq *txq; 3026 struct sge_qset *qs; 3027 int i, j, err, dump_end; 3028 static int multiplier = 1; 3029 struct sbuf *sb; 3030 struct tx_desc *txd; 3031 uint32_t *WR, wr_hi, wr_lo, gen; 3032 uint32_t data[4]; 3033 3034 txq = arg1; 3035 qs = txq_to_qset(txq, TXQ_ETH); 3036 if (txq->txq_dump_count == 0) { 3037 return (0); 3038 } 3039 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3040 log(LOG_WARNING, 3041 "dump count is too large %d\n", txq->txq_dump_count); 3042 txq->txq_dump_count = 1; 3043 return (EINVAL); 3044 } 3045 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3046 log(LOG_WARNING, 3047 "dump start of %d is greater than queue size\n", 3048 txq->txq_dump_start); 3049 txq->txq_dump_start = 0; 3050 return (EINVAL); 3051 } 3052 err = t3_sge_read_ecntxt(qs->port->adapter, txq->cntxt_id, data); 3053 if (err) 3054 return (err); 3055 3056 3057retry_sbufops: 3058 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3059 3060 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3061 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3062 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3063 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3064 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3065 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3066 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3067 txq->txq_dump_start, 3068 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3069 3070 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3071 for (i = txq->txq_dump_start; i < dump_end; i++) { 3072 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3073 WR = (uint32_t *)txd->flit; 3074 wr_hi = ntohl(WR[0]); 3075 wr_lo = ntohl(WR[1]); 3076 gen = G_WR_GEN(wr_lo); 3077 3078 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3079 wr_hi, wr_lo, gen); 3080 for (j = 2; j < 30; j += 4) 3081 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3082 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3083 3084 } 3085 if (sbuf_overflowed(sb)) { 3086 sbuf_delete(sb); 3087 multiplier++; 3088 goto retry_sbufops; 3089 } 3090 sbuf_finish(sb); 3091 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3092 sbuf_delete(sb); 3093 return (err); 3094} 3095 3096 3097static int 3098t3_lro_enable(SYSCTL_HANDLER_ARGS) 3099{ 3100 adapter_t *sc; 3101 int i, j, enabled, err, nqsets = 0; 3102 3103#ifndef LRO_WORKING 3104 return (0); 3105#endif 3106 sc = arg1; 3107 enabled = sc->sge.qs[0].lro.enabled; 3108 err = sysctl_handle_int(oidp, &enabled, arg2, req); 3109 3110 if (err != 0) 3111 return (err); 3112 if (enabled == sc->sge.qs[0].lro.enabled) 3113 return (0); 3114 3115 for (i = 0; i < sc->params.nports; i++) 3116 for (j = 0; j < sc->port[i].nqsets; j++) 3117 nqsets++; 3118 3119 for (i = 0; i < nqsets; i++) 3120 sc->sge.qs[i].lro.enabled = enabled; 3121 3122 return (0); 3123} 3124 3125static int 3126t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 3127{ 3128 adapter_t *sc = arg1; 3129 struct qset_params *qsp = &sc->params.sge.qset[0]; 3130 int coalesce_nsecs; 3131 struct sge_qset *qs; 3132 int i, j, err, nqsets = 0; 3133 struct mtx *lock; 3134 3135 if ((sc->flags & FULL_INIT_DONE) == 0) 3136 return (ENXIO); 3137 3138 coalesce_nsecs = qsp->coalesce_nsecs; 3139 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 3140 3141 if (err != 0) { 3142 return (err); 3143 } 3144 if (coalesce_nsecs == qsp->coalesce_nsecs) 3145 return (0); 3146 3147 for (i = 0; i < sc->params.nports; i++) 3148 for (j = 0; j < sc->port[i].nqsets; j++) 3149 nqsets++; 3150 3151 coalesce_nsecs = max(100, coalesce_nsecs); 3152 3153 for (i = 0; i < nqsets; i++) { 3154 qs = &sc->sge.qs[i]; 3155 qsp = &sc->params.sge.qset[i]; 3156 qsp->coalesce_nsecs = coalesce_nsecs; 3157 3158 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3159 &sc->sge.qs[0].rspq.lock; 3160 3161 mtx_lock(lock); 3162 t3_update_qset_coalesce(qs, qsp); 3163 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3164 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3165 mtx_unlock(lock); 3166 } 3167 3168 return (0); 3169} 3170 3171 3172void 3173t3_add_attach_sysctls(adapter_t *sc) 3174{ 3175 struct sysctl_ctx_list *ctx; 3176 struct sysctl_oid_list *children; 3177 3178 ctx = device_get_sysctl_ctx(sc->dev); 3179 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3180 3181 /* random information */ 3182 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3183 "firmware_version", 3184 CTLFLAG_RD, &sc->fw_version, 3185 0, "firmware version"); 3186 3187 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3188 "enable_lro", 3189 CTLTYPE_INT|CTLFLAG_RW, sc, 3190 0, t3_lro_enable, 3191 "I", "enable large receive offload"); 3192 3193 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3194 "enable_debug", 3195 CTLFLAG_RW, &cxgb_debug, 3196 0, "enable verbose debugging output"); 3197 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce", 3198 CTLFLAG_RD, &sc->tunq_coalesce, 3199 "#tunneled packets freed"); 3200 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3201 "txq_overrun", 3202 CTLFLAG_RD, &txq_fills, 3203 0, "#times txq overrun"); 3204 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3205 "pcpu_cache_enable", 3206 CTLFLAG_RW, &cxgb_pcpu_cache_enable, 3207 0, "#enable driver local pcpu caches"); 3208 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3209 "cache_alloc", 3210 CTLFLAG_RD, &cxgb_cached_allocations, 3211 0, "#times a cluster was allocated from cache"); 3212 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3213 "cached", 3214 CTLFLAG_RD, &cxgb_cached, 3215 0, "#times a cluster was cached"); 3216 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3217 "ext_freed", 3218 CTLFLAG_RD, &cxgb_ext_freed, 3219 0, "#times a cluster was freed through ext_free"); 3220 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3221 "mbufs_outstanding", 3222 CTLFLAG_RD, &cxgb_mbufs_outstanding, 3223 0, "#mbufs in flight in the driver"); 3224 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3225 "pack_outstanding", 3226 CTLFLAG_RD, &cxgb_pack_outstanding, 3227 0, "#packet in flight in the driver"); 3228} 3229 3230 3231static const char *rspq_name = "rspq"; 3232static const char *txq_names[] = 3233{ 3234 "txq_eth", 3235 "txq_ofld", 3236 "txq_ctrl" 3237}; 3238 3239void 3240t3_add_configured_sysctls(adapter_t *sc) 3241{ 3242 struct sysctl_ctx_list *ctx; 3243 struct sysctl_oid_list *children; 3244 int i, j; 3245 3246 ctx = device_get_sysctl_ctx(sc->dev); 3247 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3248 3249 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3250 "intr_coal", 3251 CTLTYPE_INT|CTLFLAG_RW, sc, 3252 0, t3_set_coalesce_nsecs, 3253 "I", "interrupt coalescing timer (ns)"); 3254 3255 for (i = 0; i < sc->params.nports; i++) { 3256 struct port_info *pi = &sc->port[i]; 3257 struct sysctl_oid *poid; 3258 struct sysctl_oid_list *poidlist; 3259 3260 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3261 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3262 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3263 poidlist = SYSCTL_CHILDREN(poid); 3264 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3265 "nqsets", CTLFLAG_RD, &pi->nqsets, 3266 0, "#queue sets"); 3267 3268 for (j = 0; j < pi->nqsets; j++) { 3269 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3270 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid; 3271 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist; 3272 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3273 3274 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3275 3276 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3277 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3278 qspoidlist = SYSCTL_CHILDREN(qspoid); 3279 3280 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3281 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3282 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3283 3284 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3285 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3286 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3287 3288 3289 3290 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3291 CTLFLAG_RD, &qs->rspq.size, 3292 0, "#entries in response queue"); 3293 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3294 CTLFLAG_RD, &qs->rspq.cidx, 3295 0, "consumer index"); 3296 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3297 CTLFLAG_RD, &qs->rspq.credits, 3298 0, "#credits"); 3299 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3300 CTLFLAG_RD, &qs->rspq.phys_addr, 3301 "physical_address_of the queue"); 3302 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3303 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3304 0, "start rspq dump entry"); 3305 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3306 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3307 0, "#rspq entries to dump"); 3308 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3309 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3310 0, t3_dump_rspq, "A", "dump of the response queue"); 3311 3312 3313 3314 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3315 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3316 0, "#tunneled packets dropped"); 3317 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3318 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3319 0, "#tunneled packets waiting to be sent"); 3320 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3321 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3322 0, "#tunneled packets queue producer index"); 3323 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3324 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3325 0, "#tunneled packets queue consumer index"); 3326 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3327 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3328 0, "#tunneled packets processed by the card"); 3329 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3330 CTLFLAG_RD, &txq->cleaned, 3331 0, "#tunneled packets cleaned"); 3332 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3333 CTLFLAG_RD, &txq->in_use, 3334 0, "#tunneled packet slots in use"); 3335 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3336 CTLFLAG_RD, &txq->txq_frees, 3337 "#tunneled packets freed"); 3338 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3339 CTLFLAG_RD, &txq->txq_skipped, 3340 0, "#tunneled packet descriptors skipped"); 3341 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced", 3342 CTLFLAG_RD, &txq->txq_coalesced, 3343 0, "#tunneled packets coalesced"); 3344 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3345 CTLFLAG_RD, &txq->txq_enqueued, 3346 0, "#tunneled packets enqueued to hardware"); 3347 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3348 CTLFLAG_RD, &qs->txq_stopped, 3349 0, "tx queues stopped"); 3350 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3351 CTLFLAG_RD, &txq->phys_addr, 3352 "physical_address_of the queue"); 3353 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3354 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3355 0, "txq generation"); 3356 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3357 CTLFLAG_RD, &txq->cidx, 3358 0, "hardware queue cidx"); 3359 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3360 CTLFLAG_RD, &txq->pidx, 3361 0, "hardware queue pidx"); 3362 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3363 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3364 0, "txq start idx for dump"); 3365 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3366 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3367 0, "txq #entries to dump"); 3368 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3369 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3370 0, t3_dump_txq, "A", "dump of the transmit queue"); 3371 } 3372 } 3373} 3374 3375/** 3376 * t3_get_desc - dump an SGE descriptor for debugging purposes 3377 * @qs: the queue set 3378 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3379 * @idx: the descriptor index in the queue 3380 * @data: where to dump the descriptor contents 3381 * 3382 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3383 * size of the descriptor. 3384 */ 3385int 3386t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3387 unsigned char *data) 3388{ 3389 if (qnum >= 6) 3390 return (EINVAL); 3391 3392 if (qnum < 3) { 3393 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3394 return -EINVAL; 3395 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3396 return sizeof(struct tx_desc); 3397 } 3398 3399 if (qnum == 3) { 3400 if (!qs->rspq.desc || idx >= qs->rspq.size) 3401 return (EINVAL); 3402 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3403 return sizeof(struct rsp_desc); 3404 } 3405 3406 qnum -= 4; 3407 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3408 return (EINVAL); 3409 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3410 return sizeof(struct rx_desc); 3411} 3412