cxgb_sge.c revision 204348
1/************************************************************************** 2 3Copyright (c) 2007-2009, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 204348 2010-02-26 07:08:44Z np $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/kernel.h> 36#include <sys/module.h> 37#include <sys/bus.h> 38#include <sys/conf.h> 39#include <machine/bus.h> 40#include <machine/resource.h> 41#include <sys/bus_dma.h> 42#include <sys/rman.h> 43#include <sys/queue.h> 44#include <sys/sysctl.h> 45#include <sys/taskqueue.h> 46 47#include <sys/proc.h> 48#include <sys/sbuf.h> 49#include <sys/sched.h> 50#include <sys/smp.h> 51#include <sys/systm.h> 52#include <sys/syslog.h> 53#include <sys/socket.h> 54 55#include <net/bpf.h> 56#include <net/ethernet.h> 57#include <net/if.h> 58#include <net/if_vlan_var.h> 59 60#include <netinet/in_systm.h> 61#include <netinet/in.h> 62#include <netinet/ip.h> 63#include <netinet/tcp.h> 64 65#include <dev/pci/pcireg.h> 66#include <dev/pci/pcivar.h> 67 68#include <vm/vm.h> 69#include <vm/pmap.h> 70 71#include <cxgb_include.h> 72#include <sys/mvec.h> 73 74int txq_fills = 0; 75int multiq_tx_enable = 1; 76 77extern struct sysctl_oid_list sysctl__hw_cxgb_children; 78int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 79TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); 80SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 81 "size of per-queue mbuf ring"); 82 83static int cxgb_tx_coalesce_force = 0; 84TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); 85SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, 86 &cxgb_tx_coalesce_force, 0, 87 "coalesce small packets into a single work request regardless of ring state"); 88 89#define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 90#define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 91#define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 92#define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 93#define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 94#define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 95#define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 96 97 98static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 99TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", 100 &cxgb_tx_coalesce_enable_start); 101SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, 102 &cxgb_tx_coalesce_enable_start, 0, 103 "coalesce enable threshold"); 104static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 105TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); 106SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, 107 &cxgb_tx_coalesce_enable_stop, 0, 108 "coalesce disable threshold"); 109static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 110TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); 111SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, 112 &cxgb_tx_reclaim_threshold, 0, 113 "tx cleaning minimum threshold"); 114 115/* 116 * XXX don't re-enable this until TOE stops assuming 117 * we have an m_ext 118 */ 119static int recycle_enable = 0; 120int cxgb_ext_freed = 0; 121int cxgb_ext_inited = 0; 122int fl_q_size = 0; 123int jumbo_q_size = 0; 124 125extern int cxgb_use_16k_clusters; 126extern int nmbjumbo4; 127extern int nmbjumbo9; 128extern int nmbjumbo16; 129 130#define USE_GTS 0 131 132#define SGE_RX_SM_BUF_SIZE 1536 133#define SGE_RX_DROP_THRES 16 134#define SGE_RX_COPY_THRES 128 135 136/* 137 * Period of the Tx buffer reclaim timer. This timer does not need to run 138 * frequently as Tx buffers are usually reclaimed by new Tx packets. 139 */ 140#define TX_RECLAIM_PERIOD (hz >> 1) 141 142/* 143 * Values for sge_txq.flags 144 */ 145enum { 146 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 147 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 148}; 149 150struct tx_desc { 151 uint64_t flit[TX_DESC_FLITS]; 152} __packed; 153 154struct rx_desc { 155 uint32_t addr_lo; 156 uint32_t len_gen; 157 uint32_t gen2; 158 uint32_t addr_hi; 159} __packed; 160 161struct rsp_desc { /* response queue descriptor */ 162 struct rss_header rss_hdr; 163 uint32_t flags; 164 uint32_t len_cq; 165 uint8_t imm_data[47]; 166 uint8_t intr_gen; 167} __packed; 168 169#define RX_SW_DESC_MAP_CREATED (1 << 0) 170#define TX_SW_DESC_MAP_CREATED (1 << 1) 171#define RX_SW_DESC_INUSE (1 << 3) 172#define TX_SW_DESC_MAPPED (1 << 4) 173 174#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 175#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 176#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 177#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 178 179struct tx_sw_desc { /* SW state per Tx descriptor */ 180 struct mbuf *m; 181 bus_dmamap_t map; 182 int flags; 183}; 184 185struct rx_sw_desc { /* SW state per Rx descriptor */ 186 caddr_t rxsd_cl; 187 struct mbuf *m; 188 bus_dmamap_t map; 189 int flags; 190}; 191 192struct txq_state { 193 unsigned int compl; 194 unsigned int gen; 195 unsigned int pidx; 196}; 197 198struct refill_fl_cb_arg { 199 int error; 200 bus_dma_segment_t seg; 201 int nseg; 202}; 203 204 205/* 206 * Maps a number of flits to the number of Tx descriptors that can hold them. 207 * The formula is 208 * 209 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 210 * 211 * HW allows up to 4 descriptors to be combined into a WR. 212 */ 213static uint8_t flit_desc_map[] = { 214 0, 215#if SGE_NUM_GENBITS == 1 216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 218 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 219 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 220#elif SGE_NUM_GENBITS == 2 221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 223 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 224 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 225#else 226# error "SGE_NUM_GENBITS must be 1 or 2" 227#endif 228}; 229 230#define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 231#define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 232#define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 233#define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 234#define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 235#define TXQ_RING_NEEDS_ENQUEUE(qs) \ 236 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 237#define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 238#define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 239 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 240#define TXQ_RING_DEQUEUE(qs) \ 241 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 242 243int cxgb_debug = 0; 244 245static void sge_timer_cb(void *arg); 246static void sge_timer_reclaim(void *arg, int ncount); 247static void sge_txq_reclaim_handler(void *arg, int ncount); 248static void cxgb_start_locked(struct sge_qset *qs); 249 250/* 251 * XXX need to cope with bursty scheduling by looking at a wider 252 * window than we are now for determining the need for coalescing 253 * 254 */ 255static __inline uint64_t 256check_pkt_coalesce(struct sge_qset *qs) 257{ 258 struct adapter *sc; 259 struct sge_txq *txq; 260 uint8_t *fill; 261 262 if (__predict_false(cxgb_tx_coalesce_force)) 263 return (1); 264 txq = &qs->txq[TXQ_ETH]; 265 sc = qs->port->adapter; 266 fill = &sc->tunq_fill[qs->idx]; 267 268 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 269 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 270 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 271 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 272 /* 273 * if the hardware transmit queue is more than 1/8 full 274 * we mark it as coalescing - we drop back from coalescing 275 * when we go below 1/32 full and there are no packets enqueued, 276 * this provides us with some degree of hysteresis 277 */ 278 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 279 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 280 *fill = 0; 281 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 282 *fill = 1; 283 284 return (sc->tunq_coalesce); 285} 286 287#ifdef __LP64__ 288static void 289set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 290{ 291 uint64_t wr_hilo; 292#if _BYTE_ORDER == _LITTLE_ENDIAN 293 wr_hilo = wr_hi; 294 wr_hilo |= (((uint64_t)wr_lo)<<32); 295#else 296 wr_hilo = wr_lo; 297 wr_hilo |= (((uint64_t)wr_hi)<<32); 298#endif 299 wrp->wrh_hilo = wr_hilo; 300} 301#else 302static void 303set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 304{ 305 306 wrp->wrh_hi = wr_hi; 307 wmb(); 308 wrp->wrh_lo = wr_lo; 309} 310#endif 311 312struct coalesce_info { 313 int count; 314 int nbytes; 315}; 316 317static int 318coalesce_check(struct mbuf *m, void *arg) 319{ 320 struct coalesce_info *ci = arg; 321 int *count = &ci->count; 322 int *nbytes = &ci->nbytes; 323 324 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 325 (*count < 7) && (m->m_next == NULL))) { 326 *count += 1; 327 *nbytes += m->m_len; 328 return (1); 329 } 330 return (0); 331} 332 333static struct mbuf * 334cxgb_dequeue(struct sge_qset *qs) 335{ 336 struct mbuf *m, *m_head, *m_tail; 337 struct coalesce_info ci; 338 339 340 if (check_pkt_coalesce(qs) == 0) 341 return TXQ_RING_DEQUEUE(qs); 342 343 m_head = m_tail = NULL; 344 ci.count = ci.nbytes = 0; 345 do { 346 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 347 if (m_head == NULL) { 348 m_tail = m_head = m; 349 } else if (m != NULL) { 350 m_tail->m_nextpkt = m; 351 m_tail = m; 352 } 353 } while (m != NULL); 354 if (ci.count > 7) 355 panic("trying to coalesce %d packets in to one WR", ci.count); 356 return (m_head); 357} 358 359/** 360 * reclaim_completed_tx - reclaims completed Tx descriptors 361 * @adapter: the adapter 362 * @q: the Tx queue to reclaim completed descriptors from 363 * 364 * Reclaims Tx descriptors that the SGE has indicated it has processed, 365 * and frees the associated buffers if possible. Called with the Tx 366 * queue's lock held. 367 */ 368static __inline int 369reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 370{ 371 struct sge_txq *q = &qs->txq[queue]; 372 int reclaim = desc_reclaimable(q); 373 374 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 375 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 376 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 377 378 if (reclaim < reclaim_min) 379 return (0); 380 381 mtx_assert(&qs->lock, MA_OWNED); 382 if (reclaim > 0) { 383 t3_free_tx_desc(qs, reclaim, queue); 384 q->cleaned += reclaim; 385 q->in_use -= reclaim; 386 } 387 if (isset(&qs->txq_stopped, TXQ_ETH)) 388 clrbit(&qs->txq_stopped, TXQ_ETH); 389 390 return (reclaim); 391} 392 393/** 394 * should_restart_tx - are there enough resources to restart a Tx queue? 395 * @q: the Tx queue 396 * 397 * Checks if there are enough descriptors to restart a suspended Tx queue. 398 */ 399static __inline int 400should_restart_tx(const struct sge_txq *q) 401{ 402 unsigned int r = q->processed - q->cleaned; 403 404 return q->in_use - r < (q->size >> 1); 405} 406 407/** 408 * t3_sge_init - initialize SGE 409 * @adap: the adapter 410 * @p: the SGE parameters 411 * 412 * Performs SGE initialization needed every time after a chip reset. 413 * We do not initialize any of the queue sets here, instead the driver 414 * top-level must request those individually. We also do not enable DMA 415 * here, that should be done after the queues have been set up. 416 */ 417void 418t3_sge_init(adapter_t *adap, struct sge_params *p) 419{ 420 u_int ctrl, ups; 421 422 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 423 424 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 425 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 426 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 427 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 428#if SGE_NUM_GENBITS == 1 429 ctrl |= F_EGRGENCTRL; 430#endif 431 if (adap->params.rev > 0) { 432 if (!(adap->flags & (USING_MSIX | USING_MSI))) 433 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 434 } 435 t3_write_reg(adap, A_SG_CONTROL, ctrl); 436 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 437 V_LORCQDRBTHRSH(512)); 438 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 439 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 440 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 441 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 442 adap->params.rev < T3_REV_C ? 1000 : 500); 443 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 444 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 445 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 446 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 447 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 448} 449 450 451/** 452 * sgl_len - calculates the size of an SGL of the given capacity 453 * @n: the number of SGL entries 454 * 455 * Calculates the number of flits needed for a scatter/gather list that 456 * can hold the given number of entries. 457 */ 458static __inline unsigned int 459sgl_len(unsigned int n) 460{ 461 return ((3 * n) / 2 + (n & 1)); 462} 463 464/** 465 * get_imm_packet - return the next ingress packet buffer from a response 466 * @resp: the response descriptor containing the packet data 467 * 468 * Return a packet containing the immediate data of the given response. 469 */ 470static int 471get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 472{ 473 474 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 475 m->m_ext.ext_buf = NULL; 476 m->m_ext.ext_type = 0; 477 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 478 return (0); 479} 480 481static __inline u_int 482flits_to_desc(u_int n) 483{ 484 return (flit_desc_map[n]); 485} 486 487#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 488 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 489 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 490 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 491 F_HIRCQPARITYERROR) 492#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 493#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 494 F_RSPQDISABLED) 495 496/** 497 * t3_sge_err_intr_handler - SGE async event interrupt handler 498 * @adapter: the adapter 499 * 500 * Interrupt handler for SGE asynchronous (non-data) events. 501 */ 502void 503t3_sge_err_intr_handler(adapter_t *adapter) 504{ 505 unsigned int v, status; 506 507 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 508 if (status & SGE_PARERR) 509 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 510 status & SGE_PARERR); 511 if (status & SGE_FRAMINGERR) 512 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 513 status & SGE_FRAMINGERR); 514 if (status & F_RSPQCREDITOVERFOW) 515 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 516 517 if (status & F_RSPQDISABLED) { 518 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 519 520 CH_ALERT(adapter, 521 "packet delivered to disabled response queue (0x%x)\n", 522 (v >> S_RSPQ0DISABLED) & 0xff); 523 } 524 525 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 526 if (status & SGE_FATALERR) 527 t3_fatal_err(adapter); 528} 529 530void 531t3_sge_prep(adapter_t *adap, struct sge_params *p) 532{ 533 int i, nqsets; 534 535 nqsets = min(SGE_QSETS, mp_ncpus*4); 536 537 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 538 539 while (!powerof2(fl_q_size)) 540 fl_q_size--; 541#if __FreeBSD_version >= 700111 542 if (cxgb_use_16k_clusters) 543 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 544 else 545 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 546#else 547 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE); 548#endif 549 while (!powerof2(jumbo_q_size)) 550 jumbo_q_size--; 551 552 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 553 device_printf(adap->dev, 554 "Insufficient clusters and/or jumbo buffers.\n"); 555 556 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 557 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 558 559 for (i = 0; i < SGE_QSETS; ++i) { 560 struct qset_params *q = p->qset + i; 561 562 if (adap->params.nports > 2) { 563 q->coalesce_usecs = 50; 564 } else { 565#ifdef INVARIANTS 566 q->coalesce_usecs = 10; 567#else 568 q->coalesce_usecs = 5; 569#endif 570 } 571 q->polling = 0; 572 q->rspq_size = RSPQ_Q_SIZE; 573 q->fl_size = fl_q_size; 574 q->jumbo_size = jumbo_q_size; 575 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 576 q->txq_size[TXQ_OFLD] = 1024; 577 q->txq_size[TXQ_CTRL] = 256; 578 q->cong_thres = 0; 579 } 580} 581 582int 583t3_sge_alloc(adapter_t *sc) 584{ 585 586 /* The parent tag. */ 587 if (bus_dma_tag_create( NULL, /* parent */ 588 1, 0, /* algnmnt, boundary */ 589 BUS_SPACE_MAXADDR, /* lowaddr */ 590 BUS_SPACE_MAXADDR, /* highaddr */ 591 NULL, NULL, /* filter, filterarg */ 592 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 593 BUS_SPACE_UNRESTRICTED, /* nsegments */ 594 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 595 0, /* flags */ 596 NULL, NULL, /* lock, lockarg */ 597 &sc->parent_dmat)) { 598 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 599 return (ENOMEM); 600 } 601 602 /* 603 * DMA tag for normal sized RX frames 604 */ 605 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 606 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 607 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 608 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 609 return (ENOMEM); 610 } 611 612 /* 613 * DMA tag for jumbo sized RX frames. 614 */ 615 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 616 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 617 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 618 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 619 return (ENOMEM); 620 } 621 622 /* 623 * DMA tag for TX frames. 624 */ 625 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 626 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 627 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 628 NULL, NULL, &sc->tx_dmat)) { 629 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 630 return (ENOMEM); 631 } 632 633 return (0); 634} 635 636int 637t3_sge_free(struct adapter * sc) 638{ 639 640 if (sc->tx_dmat != NULL) 641 bus_dma_tag_destroy(sc->tx_dmat); 642 643 if (sc->rx_jumbo_dmat != NULL) 644 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 645 646 if (sc->rx_dmat != NULL) 647 bus_dma_tag_destroy(sc->rx_dmat); 648 649 if (sc->parent_dmat != NULL) 650 bus_dma_tag_destroy(sc->parent_dmat); 651 652 return (0); 653} 654 655void 656t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 657{ 658 659 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 660 qs->rspq.polling = 0 /* p->polling */; 661} 662 663#if !defined(__i386__) && !defined(__amd64__) 664static void 665refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 666{ 667 struct refill_fl_cb_arg *cb_arg = arg; 668 669 cb_arg->error = error; 670 cb_arg->seg = segs[0]; 671 cb_arg->nseg = nseg; 672 673} 674#endif 675/** 676 * refill_fl - refill an SGE free-buffer list 677 * @sc: the controller softc 678 * @q: the free-list to refill 679 * @n: the number of new buffers to allocate 680 * 681 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 682 * The caller must assure that @n does not exceed the queue's capacity. 683 */ 684static void 685refill_fl(adapter_t *sc, struct sge_fl *q, int n) 686{ 687 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 688 struct rx_desc *d = &q->desc[q->pidx]; 689 struct refill_fl_cb_arg cb_arg; 690 struct mbuf *m; 691 caddr_t cl; 692 int err, count = 0; 693 694 cb_arg.error = 0; 695 while (n--) { 696 /* 697 * We only allocate a cluster, mbuf allocation happens after rx 698 */ 699 if (q->zone == zone_pack) { 700 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 701 break; 702 cl = m->m_ext.ext_buf; 703 } else { 704 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 705 break; 706 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 707 uma_zfree(q->zone, cl); 708 break; 709 } 710 } 711 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 712 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 713 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 714 uma_zfree(q->zone, cl); 715 goto done; 716 } 717 sd->flags |= RX_SW_DESC_MAP_CREATED; 718 } 719#if !defined(__i386__) && !defined(__amd64__) 720 err = bus_dmamap_load(q->entry_tag, sd->map, 721 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 722 723 if (err != 0 || cb_arg.error) { 724 if (q->zone == zone_pack) 725 uma_zfree(q->zone, cl); 726 m_free(m); 727 goto done; 728 } 729#else 730 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 731#endif 732 sd->flags |= RX_SW_DESC_INUSE; 733 sd->rxsd_cl = cl; 734 sd->m = m; 735 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 736 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 737 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 738 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 739 740 d++; 741 sd++; 742 743 if (++q->pidx == q->size) { 744 q->pidx = 0; 745 q->gen ^= 1; 746 sd = q->sdesc; 747 d = q->desc; 748 } 749 q->credits++; 750 count++; 751 } 752 753done: 754 if (count) 755 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 756} 757 758 759/** 760 * free_rx_bufs - free the Rx buffers on an SGE free list 761 * @sc: the controle softc 762 * @q: the SGE free list to clean up 763 * 764 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 765 * this queue should be stopped before calling this function. 766 */ 767static void 768free_rx_bufs(adapter_t *sc, struct sge_fl *q) 769{ 770 u_int cidx = q->cidx; 771 772 while (q->credits--) { 773 struct rx_sw_desc *d = &q->sdesc[cidx]; 774 775 if (d->flags & RX_SW_DESC_INUSE) { 776 bus_dmamap_unload(q->entry_tag, d->map); 777 bus_dmamap_destroy(q->entry_tag, d->map); 778 if (q->zone == zone_pack) { 779 m_init(d->m, zone_pack, MCLBYTES, 780 M_NOWAIT, MT_DATA, M_EXT); 781 uma_zfree(zone_pack, d->m); 782 } else { 783 m_init(d->m, zone_mbuf, MLEN, 784 M_NOWAIT, MT_DATA, 0); 785 uma_zfree(zone_mbuf, d->m); 786 uma_zfree(q->zone, d->rxsd_cl); 787 } 788 } 789 790 d->rxsd_cl = NULL; 791 d->m = NULL; 792 if (++cidx == q->size) 793 cidx = 0; 794 } 795} 796 797static __inline void 798__refill_fl(adapter_t *adap, struct sge_fl *fl) 799{ 800 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 801} 802 803static __inline void 804__refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 805{ 806 if ((fl->size - fl->credits) < max) 807 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 808} 809 810/** 811 * recycle_rx_buf - recycle a receive buffer 812 * @adapter: the adapter 813 * @q: the SGE free list 814 * @idx: index of buffer to recycle 815 * 816 * Recycles the specified buffer on the given free list by adding it at 817 * the next available slot on the list. 818 */ 819static void 820recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 821{ 822 struct rx_desc *from = &q->desc[idx]; 823 struct rx_desc *to = &q->desc[q->pidx]; 824 825 q->sdesc[q->pidx] = q->sdesc[idx]; 826 to->addr_lo = from->addr_lo; // already big endian 827 to->addr_hi = from->addr_hi; // likewise 828 wmb(); /* necessary ? */ 829 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 830 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 831 q->credits++; 832 833 if (++q->pidx == q->size) { 834 q->pidx = 0; 835 q->gen ^= 1; 836 } 837 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 838} 839 840static void 841alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 842{ 843 uint32_t *addr; 844 845 addr = arg; 846 *addr = segs[0].ds_addr; 847} 848 849static int 850alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 851 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 852 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 853{ 854 size_t len = nelem * elem_size; 855 void *s = NULL; 856 void *p = NULL; 857 int err; 858 859 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 860 BUS_SPACE_MAXADDR_32BIT, 861 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 862 len, 0, NULL, NULL, tag)) != 0) { 863 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 864 return (ENOMEM); 865 } 866 867 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 868 map)) != 0) { 869 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 870 return (ENOMEM); 871 } 872 873 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 874 bzero(p, len); 875 *(void **)desc = p; 876 877 if (sw_size) { 878 len = nelem * sw_size; 879 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 880 *(void **)sdesc = s; 881 } 882 if (parent_entry_tag == NULL) 883 return (0); 884 885 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 886 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 887 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 888 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 889 NULL, NULL, entry_tag)) != 0) { 890 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 891 return (ENOMEM); 892 } 893 return (0); 894} 895 896static void 897sge_slow_intr_handler(void *arg, int ncount) 898{ 899 adapter_t *sc = arg; 900 901 t3_slow_intr_handler(sc); 902} 903 904/** 905 * sge_timer_cb - perform periodic maintenance of an SGE qset 906 * @data: the SGE queue set to maintain 907 * 908 * Runs periodically from a timer to perform maintenance of an SGE queue 909 * set. It performs two tasks: 910 * 911 * a) Cleans up any completed Tx descriptors that may still be pending. 912 * Normal descriptor cleanup happens when new packets are added to a Tx 913 * queue so this timer is relatively infrequent and does any cleanup only 914 * if the Tx queue has not seen any new packets in a while. We make a 915 * best effort attempt to reclaim descriptors, in that we don't wait 916 * around if we cannot get a queue's lock (which most likely is because 917 * someone else is queueing new packets and so will also handle the clean 918 * up). Since control queues use immediate data exclusively we don't 919 * bother cleaning them up here. 920 * 921 * b) Replenishes Rx queues that have run out due to memory shortage. 922 * Normally new Rx buffers are added when existing ones are consumed but 923 * when out of memory a queue can become empty. We try to add only a few 924 * buffers here, the queue will be replenished fully as these new buffers 925 * are used up if memory shortage has subsided. 926 * 927 * c) Return coalesced response queue credits in case a response queue is 928 * starved. 929 * 930 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 931 * fifo overflows and the FW doesn't implement any recovery scheme yet. 932 */ 933static void 934sge_timer_cb(void *arg) 935{ 936 adapter_t *sc = arg; 937 if ((sc->flags & USING_MSIX) == 0) { 938 939 struct port_info *pi; 940 struct sge_qset *qs; 941 struct sge_txq *txq; 942 int i, j; 943 int reclaim_ofl, refill_rx; 944 945 if (sc->open_device_map == 0) 946 return; 947 948 for (i = 0; i < sc->params.nports; i++) { 949 pi = &sc->port[i]; 950 for (j = 0; j < pi->nqsets; j++) { 951 qs = &sc->sge.qs[pi->first_qset + j]; 952 txq = &qs->txq[0]; 953 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 954 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 955 (qs->fl[1].credits < qs->fl[1].size)); 956 if (reclaim_ofl || refill_rx) { 957 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 958 break; 959 } 960 } 961 } 962 } 963 964 if (sc->params.nports > 2) { 965 int i; 966 967 for_each_port(sc, i) { 968 struct port_info *pi = &sc->port[i]; 969 970 t3_write_reg(sc, A_SG_KDOORBELL, 971 F_SELEGRCNTX | 972 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 973 } 974 } 975 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 976 sc->open_device_map != 0) 977 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 978} 979 980/* 981 * This is meant to be a catch-all function to keep sge state private 982 * to sge.c 983 * 984 */ 985int 986t3_sge_init_adapter(adapter_t *sc) 987{ 988 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 989 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 990 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 991 return (0); 992} 993 994int 995t3_sge_reset_adapter(adapter_t *sc) 996{ 997 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 998 return (0); 999} 1000 1001int 1002t3_sge_init_port(struct port_info *pi) 1003{ 1004 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 1005 return (0); 1006} 1007 1008/** 1009 * refill_rspq - replenish an SGE response queue 1010 * @adapter: the adapter 1011 * @q: the response queue to replenish 1012 * @credits: how many new responses to make available 1013 * 1014 * Replenishes a response queue by making the supplied number of responses 1015 * available to HW. 1016 */ 1017static __inline void 1018refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1019{ 1020 1021 /* mbufs are allocated on demand when a rspq entry is processed. */ 1022 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1023 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1024} 1025 1026static void 1027sge_txq_reclaim_handler(void *arg, int ncount) 1028{ 1029 struct sge_qset *qs = arg; 1030 int i; 1031 1032 for (i = 0; i < 3; i++) 1033 reclaim_completed_tx(qs, 16, i); 1034} 1035 1036static void 1037sge_timer_reclaim(void *arg, int ncount) 1038{ 1039 struct port_info *pi = arg; 1040 int i, nqsets = pi->nqsets; 1041 adapter_t *sc = pi->adapter; 1042 struct sge_qset *qs; 1043 struct mtx *lock; 1044 1045 KASSERT((sc->flags & USING_MSIX) == 0, 1046 ("can't call timer reclaim for msi-x")); 1047 1048 for (i = 0; i < nqsets; i++) { 1049 qs = &sc->sge.qs[pi->first_qset + i]; 1050 1051 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1052 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1053 &sc->sge.qs[0].rspq.lock; 1054 1055 if (mtx_trylock(lock)) { 1056 /* XXX currently assume that we are *NOT* polling */ 1057 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1058 1059 if (qs->fl[0].credits < qs->fl[0].size - 16) 1060 __refill_fl(sc, &qs->fl[0]); 1061 if (qs->fl[1].credits < qs->fl[1].size - 16) 1062 __refill_fl(sc, &qs->fl[1]); 1063 1064 if (status & (1 << qs->rspq.cntxt_id)) { 1065 if (qs->rspq.credits) { 1066 refill_rspq(sc, &qs->rspq, 1); 1067 qs->rspq.credits--; 1068 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1069 1 << qs->rspq.cntxt_id); 1070 } 1071 } 1072 mtx_unlock(lock); 1073 } 1074 } 1075} 1076 1077/** 1078 * init_qset_cntxt - initialize an SGE queue set context info 1079 * @qs: the queue set 1080 * @id: the queue set id 1081 * 1082 * Initializes the TIDs and context ids for the queues of a queue set. 1083 */ 1084static void 1085init_qset_cntxt(struct sge_qset *qs, u_int id) 1086{ 1087 1088 qs->rspq.cntxt_id = id; 1089 qs->fl[0].cntxt_id = 2 * id; 1090 qs->fl[1].cntxt_id = 2 * id + 1; 1091 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1092 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1093 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1094 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1095 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1096 1097 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1098 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1099 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1100} 1101 1102 1103static void 1104txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1105{ 1106 txq->in_use += ndesc; 1107 /* 1108 * XXX we don't handle stopping of queue 1109 * presumably start handles this when we bump against the end 1110 */ 1111 txqs->gen = txq->gen; 1112 txq->unacked += ndesc; 1113 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1114 txq->unacked &= 31; 1115 txqs->pidx = txq->pidx; 1116 txq->pidx += ndesc; 1117#ifdef INVARIANTS 1118 if (((txqs->pidx > txq->cidx) && 1119 (txq->pidx < txqs->pidx) && 1120 (txq->pidx >= txq->cidx)) || 1121 ((txqs->pidx < txq->cidx) && 1122 (txq->pidx >= txq-> cidx)) || 1123 ((txqs->pidx < txq->cidx) && 1124 (txq->cidx < txqs->pidx))) 1125 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1126 txqs->pidx, txq->pidx, txq->cidx); 1127#endif 1128 if (txq->pidx >= txq->size) { 1129 txq->pidx -= txq->size; 1130 txq->gen ^= 1; 1131 } 1132 1133} 1134 1135/** 1136 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1137 * @m: the packet mbufs 1138 * @nsegs: the number of segments 1139 * 1140 * Returns the number of Tx descriptors needed for the given Ethernet 1141 * packet. Ethernet packets require addition of WR and CPL headers. 1142 */ 1143static __inline unsigned int 1144calc_tx_descs(const struct mbuf *m, int nsegs) 1145{ 1146 unsigned int flits; 1147 1148 if (m->m_pkthdr.len <= PIO_LEN) 1149 return 1; 1150 1151 flits = sgl_len(nsegs) + 2; 1152 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1153 flits++; 1154 1155 return flits_to_desc(flits); 1156} 1157 1158static unsigned int 1159busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1160 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1161{ 1162 struct mbuf *m0; 1163 int err, pktlen, pass = 0; 1164 bus_dma_tag_t tag = txq->entry_tag; 1165 1166retry: 1167 err = 0; 1168 m0 = *m; 1169 pktlen = m0->m_pkthdr.len; 1170#if defined(__i386__) || defined(__amd64__) 1171 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) { 1172 goto done; 1173 } else 1174#endif 1175 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0); 1176 1177 if (err == 0) { 1178 goto done; 1179 } 1180 if (err == EFBIG && pass == 0) { 1181 pass = 1; 1182 /* Too many segments, try to defrag */ 1183 m0 = m_defrag(m0, M_DONTWAIT); 1184 if (m0 == NULL) { 1185 m_freem(*m); 1186 *m = NULL; 1187 return (ENOBUFS); 1188 } 1189 *m = m0; 1190 goto retry; 1191 } else if (err == ENOMEM) { 1192 return (err); 1193 } if (err) { 1194 if (cxgb_debug) 1195 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1196 m_freem(m0); 1197 *m = NULL; 1198 return (err); 1199 } 1200done: 1201#if !defined(__i386__) && !defined(__amd64__) 1202 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE); 1203#endif 1204 txsd->flags |= TX_SW_DESC_MAPPED; 1205 1206 return (0); 1207} 1208 1209/** 1210 * make_sgl - populate a scatter/gather list for a packet 1211 * @sgp: the SGL to populate 1212 * @segs: the packet dma segments 1213 * @nsegs: the number of segments 1214 * 1215 * Generates a scatter/gather list for the buffers that make up a packet 1216 * and returns the SGL size in 8-byte words. The caller must size the SGL 1217 * appropriately. 1218 */ 1219static __inline void 1220make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1221{ 1222 int i, idx; 1223 1224 for (idx = 0, i = 0; i < nsegs; i++) { 1225 /* 1226 * firmware doesn't like empty segments 1227 */ 1228 if (segs[i].ds_len == 0) 1229 continue; 1230 if (i && idx == 0) 1231 ++sgp; 1232 1233 sgp->len[idx] = htobe32(segs[i].ds_len); 1234 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1235 idx ^= 1; 1236 } 1237 1238 if (idx) { 1239 sgp->len[idx] = 0; 1240 sgp->addr[idx] = 0; 1241 } 1242} 1243 1244/** 1245 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1246 * @adap: the adapter 1247 * @q: the Tx queue 1248 * 1249 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1250 * where the HW is going to sleep just after we checked, however, 1251 * then the interrupt handler will detect the outstanding TX packet 1252 * and ring the doorbell for us. 1253 * 1254 * When GTS is disabled we unconditionally ring the doorbell. 1255 */ 1256static __inline void 1257check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1258{ 1259#if USE_GTS 1260 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1261 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1262 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1263#ifdef T3_TRACE 1264 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1265 q->cntxt_id); 1266#endif 1267 t3_write_reg(adap, A_SG_KDOORBELL, 1268 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1269 } 1270#else 1271 wmb(); /* write descriptors before telling HW */ 1272 t3_write_reg(adap, A_SG_KDOORBELL, 1273 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1274#endif 1275} 1276 1277static __inline void 1278wr_gen2(struct tx_desc *d, unsigned int gen) 1279{ 1280#if SGE_NUM_GENBITS == 2 1281 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1282#endif 1283} 1284 1285/** 1286 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1287 * @ndesc: number of Tx descriptors spanned by the SGL 1288 * @txd: first Tx descriptor to be written 1289 * @txqs: txq state (generation and producer index) 1290 * @txq: the SGE Tx queue 1291 * @sgl: the SGL 1292 * @flits: number of flits to the start of the SGL in the first descriptor 1293 * @sgl_flits: the SGL size in flits 1294 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1295 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1296 * 1297 * Write a work request header and an associated SGL. If the SGL is 1298 * small enough to fit into one Tx descriptor it has already been written 1299 * and we just need to write the WR header. Otherwise we distribute the 1300 * SGL across the number of descriptors it spans. 1301 */ 1302static void 1303write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1304 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1305 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1306{ 1307 1308 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1309 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1310 1311 if (__predict_true(ndesc == 1)) { 1312 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1313 V_WR_SGLSFLT(flits)) | wr_hi, 1314 htonl(V_WR_LEN(flits + sgl_flits) | 1315 V_WR_GEN(txqs->gen)) | wr_lo); 1316 /* XXX gen? */ 1317 wr_gen2(txd, txqs->gen); 1318 1319 } else { 1320 unsigned int ogen = txqs->gen; 1321 const uint64_t *fp = (const uint64_t *)sgl; 1322 struct work_request_hdr *wp = wrp; 1323 1324 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1325 V_WR_SGLSFLT(flits)) | wr_hi; 1326 1327 while (sgl_flits) { 1328 unsigned int avail = WR_FLITS - flits; 1329 1330 if (avail > sgl_flits) 1331 avail = sgl_flits; 1332 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1333 sgl_flits -= avail; 1334 ndesc--; 1335 if (!sgl_flits) 1336 break; 1337 1338 fp += avail; 1339 txd++; 1340 txsd++; 1341 if (++txqs->pidx == txq->size) { 1342 txqs->pidx = 0; 1343 txqs->gen ^= 1; 1344 txd = txq->desc; 1345 txsd = txq->sdesc; 1346 } 1347 1348 /* 1349 * when the head of the mbuf chain 1350 * is freed all clusters will be freed 1351 * with it 1352 */ 1353 wrp = (struct work_request_hdr *)txd; 1354 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1355 V_WR_SGLSFLT(1)) | wr_hi; 1356 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1357 sgl_flits + 1)) | 1358 V_WR_GEN(txqs->gen)) | wr_lo; 1359 wr_gen2(txd, txqs->gen); 1360 flits = 1; 1361 } 1362 wrp->wrh_hi |= htonl(F_WR_EOP); 1363 wmb(); 1364 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1365 wr_gen2((struct tx_desc *)wp, ogen); 1366 } 1367} 1368 1369/* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ 1370#define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) 1371 1372#define GET_VTAG(cntrl, m) \ 1373do { \ 1374 if ((m)->m_flags & M_VLANTAG) \ 1375 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1376} while (0) 1377 1378static int 1379t3_encap(struct sge_qset *qs, struct mbuf **m) 1380{ 1381 adapter_t *sc; 1382 struct mbuf *m0; 1383 struct sge_txq *txq; 1384 struct txq_state txqs; 1385 struct port_info *pi; 1386 unsigned int ndesc, flits, cntrl, mlen; 1387 int err, nsegs, tso_info = 0; 1388 1389 struct work_request_hdr *wrp; 1390 struct tx_sw_desc *txsd; 1391 struct sg_ent *sgp, *sgl; 1392 uint32_t wr_hi, wr_lo, sgl_flits; 1393 bus_dma_segment_t segs[TX_MAX_SEGS]; 1394 1395 struct tx_desc *txd; 1396 1397 pi = qs->port; 1398 sc = pi->adapter; 1399 txq = &qs->txq[TXQ_ETH]; 1400 txd = &txq->desc[txq->pidx]; 1401 txsd = &txq->sdesc[txq->pidx]; 1402 sgl = txq->txq_sgl; 1403 1404 prefetch(txd); 1405 m0 = *m; 1406 1407 mtx_assert(&qs->lock, MA_OWNED); 1408 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1409 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1410 1411 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1412 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1413 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1414 1415 if (m0->m_nextpkt != NULL) { 1416 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1417 ndesc = 1; 1418 mlen = 0; 1419 } else { 1420 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1421 &m0, segs, &nsegs))) { 1422 if (cxgb_debug) 1423 printf("failed ... err=%d\n", err); 1424 return (err); 1425 } 1426 mlen = m0->m_pkthdr.len; 1427 ndesc = calc_tx_descs(m0, nsegs); 1428 } 1429 txq_prod(txq, ndesc, &txqs); 1430 1431 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1432 txsd->m = m0; 1433 1434 if (m0->m_nextpkt != NULL) { 1435 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1436 int i, fidx; 1437 1438 if (nsegs > 7) 1439 panic("trying to coalesce %d packets in to one WR", nsegs); 1440 txq->txq_coalesced += nsegs; 1441 wrp = (struct work_request_hdr *)txd; 1442 flits = nsegs*2 + 1; 1443 1444 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1445 struct cpl_tx_pkt_batch_entry *cbe; 1446 uint64_t flit; 1447 uint32_t *hflit = (uint32_t *)&flit; 1448 int cflags = m0->m_pkthdr.csum_flags; 1449 1450 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1451 GET_VTAG(cntrl, m0); 1452 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1453 if (__predict_false(!(cflags & CSUM_IP))) 1454 cntrl |= F_TXPKT_IPCSUM_DIS; 1455 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) 1456 cntrl |= F_TXPKT_L4CSUM_DIS; 1457 1458 hflit[0] = htonl(cntrl); 1459 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1460 flit |= htobe64(1 << 24); 1461 cbe = &cpl_batch->pkt_entry[i]; 1462 cbe->cntrl = hflit[0]; 1463 cbe->len = hflit[1]; 1464 cbe->addr = htobe64(segs[i].ds_addr); 1465 } 1466 1467 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1468 V_WR_SGLSFLT(flits)) | 1469 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1470 wr_lo = htonl(V_WR_LEN(flits) | 1471 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1472 set_wr_hdr(wrp, wr_hi, wr_lo); 1473 wmb(); 1474 ETHER_BPF_MTAP(pi->ifp, m0); 1475 wr_gen2(txd, txqs.gen); 1476 check_ring_tx_db(sc, txq); 1477 return (0); 1478 } else if (tso_info) { 1479 int eth_type; 1480 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1481 struct ether_header *eh; 1482 struct ip *ip; 1483 struct tcphdr *tcp; 1484 1485 txd->flit[2] = 0; 1486 GET_VTAG(cntrl, m0); 1487 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1488 hdr->cntrl = htonl(cntrl); 1489 hdr->len = htonl(mlen | 0x80000000); 1490 1491 if (__predict_false(mlen < TCPPKTHDRSIZE)) { 1492 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1493 m0, mlen, m0->m_pkthdr.tso_segsz, 1494 m0->m_pkthdr.csum_flags, m0->m_flags); 1495 panic("tx tso packet too small"); 1496 } 1497 1498 /* Make sure that ether, ip, tcp headers are all in m0 */ 1499 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1500 m0 = m_pullup(m0, TCPPKTHDRSIZE); 1501 if (__predict_false(m0 == NULL)) { 1502 /* XXX panic probably an overreaction */ 1503 panic("couldn't fit header into mbuf"); 1504 } 1505 } 1506 1507 eh = mtod(m0, struct ether_header *); 1508 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1509 eth_type = CPL_ETH_II_VLAN; 1510 ip = (struct ip *)((struct ether_vlan_header *)eh + 1); 1511 } else { 1512 eth_type = CPL_ETH_II; 1513 ip = (struct ip *)(eh + 1); 1514 } 1515 tcp = (struct tcphdr *)(ip + 1); 1516 1517 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1518 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1519 V_LSO_TCPHDR_WORDS(tcp->th_off); 1520 hdr->lso_info = htonl(tso_info); 1521 1522 if (__predict_false(mlen <= PIO_LEN)) { 1523 /* 1524 * pkt not undersized but fits in PIO_LEN 1525 * Indicates a TSO bug at the higher levels. 1526 */ 1527 txsd->m = NULL; 1528 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1529 flits = (mlen + 7) / 8 + 3; 1530 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1531 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1532 F_WR_SOP | F_WR_EOP | txqs.compl); 1533 wr_lo = htonl(V_WR_LEN(flits) | 1534 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1535 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1536 wmb(); 1537 ETHER_BPF_MTAP(pi->ifp, m0); 1538 wr_gen2(txd, txqs.gen); 1539 check_ring_tx_db(sc, txq); 1540 m_freem(m0); 1541 return (0); 1542 } 1543 flits = 3; 1544 } else { 1545 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1546 1547 GET_VTAG(cntrl, m0); 1548 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1549 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1550 cntrl |= F_TXPKT_IPCSUM_DIS; 1551 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1552 cntrl |= F_TXPKT_L4CSUM_DIS; 1553 cpl->cntrl = htonl(cntrl); 1554 cpl->len = htonl(mlen | 0x80000000); 1555 1556 if (mlen <= PIO_LEN) { 1557 txsd->m = NULL; 1558 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1559 flits = (mlen + 7) / 8 + 2; 1560 1561 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1562 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1563 F_WR_SOP | F_WR_EOP | txqs.compl); 1564 wr_lo = htonl(V_WR_LEN(flits) | 1565 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1566 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1567 wmb(); 1568 ETHER_BPF_MTAP(pi->ifp, m0); 1569 wr_gen2(txd, txqs.gen); 1570 check_ring_tx_db(sc, txq); 1571 m_freem(m0); 1572 return (0); 1573 } 1574 flits = 2; 1575 } 1576 wrp = (struct work_request_hdr *)txd; 1577 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1578 make_sgl(sgp, segs, nsegs); 1579 1580 sgl_flits = sgl_len(nsegs); 1581 1582 ETHER_BPF_MTAP(pi->ifp, m0); 1583 1584 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1585 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1586 wr_lo = htonl(V_WR_TID(txq->token)); 1587 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1588 sgl_flits, wr_hi, wr_lo); 1589 check_ring_tx_db(sc, txq); 1590 1591 return (0); 1592} 1593 1594void 1595cxgb_tx_watchdog(void *arg) 1596{ 1597 struct sge_qset *qs = arg; 1598 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1599 1600 if (qs->coalescing != 0 && 1601 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1602 TXQ_RING_EMPTY(qs)) 1603 qs->coalescing = 0; 1604 else if (qs->coalescing == 0 && 1605 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1606 qs->coalescing = 1; 1607 if (TXQ_TRYLOCK(qs)) { 1608 qs->qs_flags |= QS_FLUSHING; 1609 cxgb_start_locked(qs); 1610 qs->qs_flags &= ~QS_FLUSHING; 1611 TXQ_UNLOCK(qs); 1612 } 1613 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1614 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1615 qs, txq->txq_watchdog.c_cpu); 1616} 1617 1618static void 1619cxgb_tx_timeout(void *arg) 1620{ 1621 struct sge_qset *qs = arg; 1622 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1623 1624 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1625 qs->coalescing = 1; 1626 if (TXQ_TRYLOCK(qs)) { 1627 qs->qs_flags |= QS_TIMEOUT; 1628 cxgb_start_locked(qs); 1629 qs->qs_flags &= ~QS_TIMEOUT; 1630 TXQ_UNLOCK(qs); 1631 } 1632} 1633 1634static void 1635cxgb_start_locked(struct sge_qset *qs) 1636{ 1637 struct mbuf *m_head = NULL; 1638 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1639 int avail, txmax; 1640 int in_use_init = txq->in_use; 1641 struct port_info *pi = qs->port; 1642 struct ifnet *ifp = pi->ifp; 1643 avail = txq->size - txq->in_use - 4; 1644 txmax = min(TX_START_MAX_DESC, avail); 1645 1646 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1647 reclaim_completed_tx(qs, 0, TXQ_ETH); 1648 1649 if (!pi->link_config.link_ok) { 1650 TXQ_RING_FLUSH(qs); 1651 return; 1652 } 1653 TXQ_LOCK_ASSERT(qs); 1654 while ((txq->in_use - in_use_init < txmax) && 1655 !TXQ_RING_EMPTY(qs) && 1656 (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1657 pi->link_config.link_ok) { 1658 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1659 1660 if ((m_head = cxgb_dequeue(qs)) == NULL) 1661 break; 1662 /* 1663 * Encapsulation can modify our pointer, and or make it 1664 * NULL on failure. In that event, we can't requeue. 1665 */ 1666 if (t3_encap(qs, &m_head) || m_head == NULL) 1667 break; 1668 1669 m_head = NULL; 1670 } 1671 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1672 pi->link_config.link_ok) 1673 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1674 qs, txq->txq_timer.c_cpu); 1675 if (m_head != NULL) 1676 m_freem(m_head); 1677} 1678 1679static int 1680cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1681{ 1682 struct port_info *pi = qs->port; 1683 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1684 struct buf_ring *br = txq->txq_mr; 1685 int error, avail; 1686 1687 avail = txq->size - txq->in_use; 1688 TXQ_LOCK_ASSERT(qs); 1689 1690 /* 1691 * We can only do a direct transmit if the following are true: 1692 * - we aren't coalescing (ring < 3/4 full) 1693 * - the link is up -- checked in caller 1694 * - there are no packets enqueued already 1695 * - there is space in hardware transmit queue 1696 */ 1697 if (check_pkt_coalesce(qs) == 0 && 1698 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > 4) { 1699 if (t3_encap(qs, &m)) { 1700 if (m != NULL && 1701 (error = drbr_enqueue(ifp, br, m)) != 0) 1702 return (error); 1703 } else { 1704 /* 1705 * We've bypassed the buf ring so we need to update 1706 * the stats directly 1707 */ 1708 txq->txq_direct_packets++; 1709 txq->txq_direct_bytes += m->m_pkthdr.len; 1710 } 1711 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1712 return (error); 1713 1714 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1715 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1716 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1717 cxgb_start_locked(qs); 1718 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1719 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1720 qs, txq->txq_timer.c_cpu); 1721 return (0); 1722} 1723 1724int 1725cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1726{ 1727 struct sge_qset *qs; 1728 struct port_info *pi = ifp->if_softc; 1729 int error, qidx = pi->first_qset; 1730 1731 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1732 ||(!pi->link_config.link_ok)) { 1733 m_freem(m); 1734 return (0); 1735 } 1736 1737 if (m->m_flags & M_FLOWID) 1738 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1739 1740 qs = &pi->adapter->sge.qs[qidx]; 1741 1742 if (TXQ_TRYLOCK(qs)) { 1743 /* XXX running */ 1744 error = cxgb_transmit_locked(ifp, qs, m); 1745 TXQ_UNLOCK(qs); 1746 } else 1747 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1748 return (error); 1749} 1750void 1751cxgb_start(struct ifnet *ifp) 1752{ 1753 struct port_info *pi = ifp->if_softc; 1754 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset]; 1755 1756 if (!pi->link_config.link_ok) 1757 return; 1758 1759 TXQ_LOCK(qs); 1760 cxgb_start_locked(qs); 1761 TXQ_UNLOCK(qs); 1762} 1763 1764void 1765cxgb_qflush(struct ifnet *ifp) 1766{ 1767 /* 1768 * flush any enqueued mbufs in the buf_rings 1769 * and in the transmit queues 1770 * no-op for now 1771 */ 1772 return; 1773} 1774 1775/** 1776 * write_imm - write a packet into a Tx descriptor as immediate data 1777 * @d: the Tx descriptor to write 1778 * @m: the packet 1779 * @len: the length of packet data to write as immediate data 1780 * @gen: the generation bit value to write 1781 * 1782 * Writes a packet as immediate data into a Tx descriptor. The packet 1783 * contains a work request at its beginning. We must write the packet 1784 * carefully so the SGE doesn't read accidentally before it's written in 1785 * its entirety. 1786 */ 1787static __inline void 1788write_imm(struct tx_desc *d, struct mbuf *m, 1789 unsigned int len, unsigned int gen) 1790{ 1791 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1792 struct work_request_hdr *to = (struct work_request_hdr *)d; 1793 uint32_t wr_hi, wr_lo; 1794 1795 if (len > WR_LEN) 1796 panic("len too big %d\n", len); 1797 if (len < sizeof(*from)) 1798 panic("len too small %d", len); 1799 1800 memcpy(&to[1], &from[1], len - sizeof(*from)); 1801 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1802 V_WR_BCNTLFLT(len & 7)); 1803 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | 1804 V_WR_LEN((len + 7) / 8)); 1805 set_wr_hdr(to, wr_hi, wr_lo); 1806 wmb(); 1807 wr_gen2(d, gen); 1808 1809 /* 1810 * This check is a hack we should really fix the logic so 1811 * that this can't happen 1812 */ 1813 if (m->m_type != MT_DONTFREE) 1814 m_freem(m); 1815 1816} 1817 1818/** 1819 * check_desc_avail - check descriptor availability on a send queue 1820 * @adap: the adapter 1821 * @q: the TX queue 1822 * @m: the packet needing the descriptors 1823 * @ndesc: the number of Tx descriptors needed 1824 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1825 * 1826 * Checks if the requested number of Tx descriptors is available on an 1827 * SGE send queue. If the queue is already suspended or not enough 1828 * descriptors are available the packet is queued for later transmission. 1829 * Must be called with the Tx queue locked. 1830 * 1831 * Returns 0 if enough descriptors are available, 1 if there aren't 1832 * enough descriptors and the packet has been queued, and 2 if the caller 1833 * needs to retry because there weren't enough descriptors at the 1834 * beginning of the call but some freed up in the mean time. 1835 */ 1836static __inline int 1837check_desc_avail(adapter_t *adap, struct sge_txq *q, 1838 struct mbuf *m, unsigned int ndesc, 1839 unsigned int qid) 1840{ 1841 /* 1842 * XXX We currently only use this for checking the control queue 1843 * the control queue is only used for binding qsets which happens 1844 * at init time so we are guaranteed enough descriptors 1845 */ 1846 if (__predict_false(!mbufq_empty(&q->sendq))) { 1847addq_exit: mbufq_tail(&q->sendq, m); 1848 return 1; 1849 } 1850 if (__predict_false(q->size - q->in_use < ndesc)) { 1851 1852 struct sge_qset *qs = txq_to_qset(q, qid); 1853 1854 setbit(&qs->txq_stopped, qid); 1855 if (should_restart_tx(q) && 1856 test_and_clear_bit(qid, &qs->txq_stopped)) 1857 return 2; 1858 1859 q->stops++; 1860 goto addq_exit; 1861 } 1862 return 0; 1863} 1864 1865 1866/** 1867 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1868 * @q: the SGE control Tx queue 1869 * 1870 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1871 * that send only immediate data (presently just the control queues) and 1872 * thus do not have any mbufs 1873 */ 1874static __inline void 1875reclaim_completed_tx_imm(struct sge_txq *q) 1876{ 1877 unsigned int reclaim = q->processed - q->cleaned; 1878 1879 q->in_use -= reclaim; 1880 q->cleaned += reclaim; 1881} 1882 1883static __inline int 1884immediate(const struct mbuf *m) 1885{ 1886 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1887} 1888 1889/** 1890 * ctrl_xmit - send a packet through an SGE control Tx queue 1891 * @adap: the adapter 1892 * @q: the control queue 1893 * @m: the packet 1894 * 1895 * Send a packet through an SGE control Tx queue. Packets sent through 1896 * a control queue must fit entirely as immediate data in a single Tx 1897 * descriptor and have no page fragments. 1898 */ 1899static int 1900ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1901{ 1902 int ret; 1903 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1904 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1905 1906 if (__predict_false(!immediate(m))) { 1907 m_freem(m); 1908 return 0; 1909 } 1910 1911 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1912 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1913 1914 TXQ_LOCK(qs); 1915again: reclaim_completed_tx_imm(q); 1916 1917 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1918 if (__predict_false(ret)) { 1919 if (ret == 1) { 1920 TXQ_UNLOCK(qs); 1921 return (ENOSPC); 1922 } 1923 goto again; 1924 } 1925 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1926 1927 q->in_use++; 1928 if (++q->pidx >= q->size) { 1929 q->pidx = 0; 1930 q->gen ^= 1; 1931 } 1932 TXQ_UNLOCK(qs); 1933 wmb(); 1934 t3_write_reg(adap, A_SG_KDOORBELL, 1935 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1936 return (0); 1937} 1938 1939 1940/** 1941 * restart_ctrlq - restart a suspended control queue 1942 * @qs: the queue set cotaining the control queue 1943 * 1944 * Resumes transmission on a suspended Tx control queue. 1945 */ 1946static void 1947restart_ctrlq(void *data, int npending) 1948{ 1949 struct mbuf *m; 1950 struct sge_qset *qs = (struct sge_qset *)data; 1951 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1952 adapter_t *adap = qs->port->adapter; 1953 1954 TXQ_LOCK(qs); 1955again: reclaim_completed_tx_imm(q); 1956 1957 while (q->in_use < q->size && 1958 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1959 1960 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1961 1962 if (++q->pidx >= q->size) { 1963 q->pidx = 0; 1964 q->gen ^= 1; 1965 } 1966 q->in_use++; 1967 } 1968 if (!mbufq_empty(&q->sendq)) { 1969 setbit(&qs->txq_stopped, TXQ_CTRL); 1970 1971 if (should_restart_tx(q) && 1972 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1973 goto again; 1974 q->stops++; 1975 } 1976 TXQ_UNLOCK(qs); 1977 t3_write_reg(adap, A_SG_KDOORBELL, 1978 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1979} 1980 1981 1982/* 1983 * Send a management message through control queue 0 1984 */ 1985int 1986t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1987{ 1988 return ctrl_xmit(adap, &adap->sge.qs[0], m); 1989} 1990 1991/** 1992 * free_qset - free the resources of an SGE queue set 1993 * @sc: the controller owning the queue set 1994 * @q: the queue set 1995 * 1996 * Release the HW and SW resources associated with an SGE queue set, such 1997 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1998 * queue set must be quiesced prior to calling this. 1999 */ 2000static void 2001t3_free_qset(adapter_t *sc, struct sge_qset *q) 2002{ 2003 int i; 2004 2005 reclaim_completed_tx(q, 0, TXQ_ETH); 2006 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2007 if (q->txq[i].txq_mr != NULL) 2008 buf_ring_free(q->txq[i].txq_mr, M_DEVBUF); 2009 if (q->txq[i].txq_ifq != NULL) { 2010 ifq_delete(q->txq[i].txq_ifq); 2011 free(q->txq[i].txq_ifq, M_DEVBUF); 2012 } 2013 } 2014 2015 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2016 if (q->fl[i].desc) { 2017 mtx_lock_spin(&sc->sge.reg_lock); 2018 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2019 mtx_unlock_spin(&sc->sge.reg_lock); 2020 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2021 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2022 q->fl[i].desc_map); 2023 bus_dma_tag_destroy(q->fl[i].desc_tag); 2024 bus_dma_tag_destroy(q->fl[i].entry_tag); 2025 } 2026 if (q->fl[i].sdesc) { 2027 free_rx_bufs(sc, &q->fl[i]); 2028 free(q->fl[i].sdesc, M_DEVBUF); 2029 } 2030 } 2031 2032 mtx_unlock(&q->lock); 2033 MTX_DESTROY(&q->lock); 2034 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2035 if (q->txq[i].desc) { 2036 mtx_lock_spin(&sc->sge.reg_lock); 2037 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2038 mtx_unlock_spin(&sc->sge.reg_lock); 2039 bus_dmamap_unload(q->txq[i].desc_tag, 2040 q->txq[i].desc_map); 2041 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2042 q->txq[i].desc_map); 2043 bus_dma_tag_destroy(q->txq[i].desc_tag); 2044 bus_dma_tag_destroy(q->txq[i].entry_tag); 2045 } 2046 if (q->txq[i].sdesc) { 2047 free(q->txq[i].sdesc, M_DEVBUF); 2048 } 2049 } 2050 2051 if (q->rspq.desc) { 2052 mtx_lock_spin(&sc->sge.reg_lock); 2053 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2054 mtx_unlock_spin(&sc->sge.reg_lock); 2055 2056 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2057 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2058 q->rspq.desc_map); 2059 bus_dma_tag_destroy(q->rspq.desc_tag); 2060 MTX_DESTROY(&q->rspq.lock); 2061 } 2062 2063 tcp_lro_free(&q->lro.ctrl); 2064 2065 bzero(q, sizeof(*q)); 2066} 2067 2068/** 2069 * t3_free_sge_resources - free SGE resources 2070 * @sc: the adapter softc 2071 * 2072 * Frees resources used by the SGE queue sets. 2073 */ 2074void 2075t3_free_sge_resources(adapter_t *sc) 2076{ 2077 int i, nqsets; 2078 2079 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2080 nqsets += sc->port[i].nqsets; 2081 2082 for (i = 0; i < nqsets; ++i) { 2083 TXQ_LOCK(&sc->sge.qs[i]); 2084 t3_free_qset(sc, &sc->sge.qs[i]); 2085 } 2086 2087} 2088 2089/** 2090 * t3_sge_start - enable SGE 2091 * @sc: the controller softc 2092 * 2093 * Enables the SGE for DMAs. This is the last step in starting packet 2094 * transfers. 2095 */ 2096void 2097t3_sge_start(adapter_t *sc) 2098{ 2099 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2100} 2101 2102/** 2103 * t3_sge_stop - disable SGE operation 2104 * @sc: the adapter 2105 * 2106 * Disables the DMA engine. This can be called in emeregencies (e.g., 2107 * from error interrupts) or from normal process context. In the latter 2108 * case it also disables any pending queue restart tasklets. Note that 2109 * if it is called in interrupt context it cannot disable the restart 2110 * tasklets as it cannot wait, however the tasklets will have no effect 2111 * since the doorbells are disabled and the driver will call this again 2112 * later from process context, at which time the tasklets will be stopped 2113 * if they are still running. 2114 */ 2115void 2116t3_sge_stop(adapter_t *sc) 2117{ 2118 int i, nqsets; 2119 2120 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2121 2122 if (sc->tq == NULL) 2123 return; 2124 2125 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2126 nqsets += sc->port[i].nqsets; 2127#ifdef notyet 2128 /* 2129 * 2130 * XXX 2131 */ 2132 for (i = 0; i < nqsets; ++i) { 2133 struct sge_qset *qs = &sc->sge.qs[i]; 2134 2135 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2136 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2137 } 2138#endif 2139} 2140 2141/** 2142 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2143 * @adapter: the adapter 2144 * @q: the Tx queue to reclaim descriptors from 2145 * @reclaimable: the number of descriptors to reclaim 2146 * @m_vec_size: maximum number of buffers to reclaim 2147 * @desc_reclaimed: returns the number of descriptors reclaimed 2148 * 2149 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2150 * Tx buffers. Called with the Tx queue lock held. 2151 * 2152 * Returns number of buffers of reclaimed 2153 */ 2154void 2155t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2156{ 2157 struct tx_sw_desc *txsd; 2158 unsigned int cidx, mask; 2159 struct sge_txq *q = &qs->txq[queue]; 2160 2161#ifdef T3_TRACE 2162 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2163 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2164#endif 2165 cidx = q->cidx; 2166 mask = q->size - 1; 2167 txsd = &q->sdesc[cidx]; 2168 2169 mtx_assert(&qs->lock, MA_OWNED); 2170 while (reclaimable--) { 2171 prefetch(q->sdesc[(cidx + 1) & mask].m); 2172 prefetch(q->sdesc[(cidx + 2) & mask].m); 2173 2174 if (txsd->m != NULL) { 2175 if (txsd->flags & TX_SW_DESC_MAPPED) { 2176 bus_dmamap_unload(q->entry_tag, txsd->map); 2177 txsd->flags &= ~TX_SW_DESC_MAPPED; 2178 } 2179 m_freem_list(txsd->m); 2180 txsd->m = NULL; 2181 } else 2182 q->txq_skipped++; 2183 2184 ++txsd; 2185 if (++cidx == q->size) { 2186 cidx = 0; 2187 txsd = q->sdesc; 2188 } 2189 } 2190 q->cidx = cidx; 2191 2192} 2193 2194/** 2195 * is_new_response - check if a response is newly written 2196 * @r: the response descriptor 2197 * @q: the response queue 2198 * 2199 * Returns true if a response descriptor contains a yet unprocessed 2200 * response. 2201 */ 2202static __inline int 2203is_new_response(const struct rsp_desc *r, 2204 const struct sge_rspq *q) 2205{ 2206 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2207} 2208 2209#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2210#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2211 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2212 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2213 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2214 2215/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2216#define NOMEM_INTR_DELAY 2500 2217 2218/** 2219 * write_ofld_wr - write an offload work request 2220 * @adap: the adapter 2221 * @m: the packet to send 2222 * @q: the Tx queue 2223 * @pidx: index of the first Tx descriptor to write 2224 * @gen: the generation value to use 2225 * @ndesc: number of descriptors the packet will occupy 2226 * 2227 * Write an offload work request to send the supplied packet. The packet 2228 * data already carry the work request with most fields populated. 2229 */ 2230static void 2231write_ofld_wr(adapter_t *adap, struct mbuf *m, 2232 struct sge_txq *q, unsigned int pidx, 2233 unsigned int gen, unsigned int ndesc, 2234 bus_dma_segment_t *segs, unsigned int nsegs) 2235{ 2236 unsigned int sgl_flits, flits; 2237 struct work_request_hdr *from; 2238 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 2239 struct tx_desc *d = &q->desc[pidx]; 2240 struct txq_state txqs; 2241 2242 if (immediate(m) && nsegs == 0) { 2243 write_imm(d, m, m->m_len, gen); 2244 return; 2245 } 2246 2247 /* Only TX_DATA builds SGLs */ 2248 from = mtod(m, struct work_request_hdr *); 2249 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 2250 2251 flits = m->m_len / 8; 2252 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 2253 2254 make_sgl(sgp, segs, nsegs); 2255 sgl_flits = sgl_len(nsegs); 2256 2257 txqs.gen = gen; 2258 txqs.pidx = pidx; 2259 txqs.compl = 0; 2260 2261 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 2262 from->wrh_hi, from->wrh_lo); 2263} 2264 2265/** 2266 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2267 * @m: the packet 2268 * 2269 * Returns the number of Tx descriptors needed for the given offload 2270 * packet. These packets are already fully constructed. 2271 */ 2272static __inline unsigned int 2273calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2274{ 2275 unsigned int flits, cnt = 0; 2276 int ndescs; 2277 2278 if (m->m_len <= WR_LEN && nsegs == 0) 2279 return (1); /* packet fits as immediate data */ 2280 2281 /* 2282 * This needs to be re-visited for TOE 2283 */ 2284 2285 cnt = nsegs; 2286 2287 /* headers */ 2288 flits = m->m_len / 8; 2289 2290 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2291 2292 return (ndescs); 2293} 2294 2295/** 2296 * ofld_xmit - send a packet through an offload queue 2297 * @adap: the adapter 2298 * @q: the Tx offload queue 2299 * @m: the packet 2300 * 2301 * Send an offload packet through an SGE offload queue. 2302 */ 2303static int 2304ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2305{ 2306 int ret, nsegs; 2307 unsigned int ndesc; 2308 unsigned int pidx, gen; 2309 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2310 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2311 struct tx_sw_desc *stx; 2312 2313 nsegs = m_get_sgllen(m); 2314 vsegs = m_get_sgl(m); 2315 ndesc = calc_tx_descs_ofld(m, nsegs); 2316 busdma_map_sgl(vsegs, segs, nsegs); 2317 2318 stx = &q->sdesc[q->pidx]; 2319 2320 TXQ_LOCK(qs); 2321again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2322 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2323 if (__predict_false(ret)) { 2324 if (ret == 1) { 2325 printf("no ofld desc avail\n"); 2326 2327 m_set_priority(m, ndesc); /* save for restart */ 2328 TXQ_UNLOCK(qs); 2329 return (EINTR); 2330 } 2331 goto again; 2332 } 2333 2334 gen = q->gen; 2335 q->in_use += ndesc; 2336 pidx = q->pidx; 2337 q->pidx += ndesc; 2338 if (q->pidx >= q->size) { 2339 q->pidx -= q->size; 2340 q->gen ^= 1; 2341 } 2342#ifdef T3_TRACE 2343 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2344 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2345 ndesc, pidx, skb->len, skb->len - skb->data_len, 2346 skb_shinfo(skb)->nr_frags); 2347#endif 2348 TXQ_UNLOCK(qs); 2349 2350 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2351 check_ring_tx_db(adap, q); 2352 return (0); 2353} 2354 2355/** 2356 * restart_offloadq - restart a suspended offload queue 2357 * @qs: the queue set cotaining the offload queue 2358 * 2359 * Resumes transmission on a suspended Tx offload queue. 2360 */ 2361static void 2362restart_offloadq(void *data, int npending) 2363{ 2364 struct mbuf *m; 2365 struct sge_qset *qs = data; 2366 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2367 adapter_t *adap = qs->port->adapter; 2368 bus_dma_segment_t segs[TX_MAX_SEGS]; 2369 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2370 int nsegs, cleaned; 2371 2372 TXQ_LOCK(qs); 2373again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2374 2375 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2376 unsigned int gen, pidx; 2377 unsigned int ndesc = m_get_priority(m); 2378 2379 if (__predict_false(q->size - q->in_use < ndesc)) { 2380 setbit(&qs->txq_stopped, TXQ_OFLD); 2381 if (should_restart_tx(q) && 2382 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2383 goto again; 2384 q->stops++; 2385 break; 2386 } 2387 2388 gen = q->gen; 2389 q->in_use += ndesc; 2390 pidx = q->pidx; 2391 q->pidx += ndesc; 2392 if (q->pidx >= q->size) { 2393 q->pidx -= q->size; 2394 q->gen ^= 1; 2395 } 2396 2397 (void)mbufq_dequeue(&q->sendq); 2398 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2399 TXQ_UNLOCK(qs); 2400 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2401 TXQ_LOCK(qs); 2402 } 2403#if USE_GTS 2404 set_bit(TXQ_RUNNING, &q->flags); 2405 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2406#endif 2407 TXQ_UNLOCK(qs); 2408 wmb(); 2409 t3_write_reg(adap, A_SG_KDOORBELL, 2410 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2411} 2412 2413/** 2414 * queue_set - return the queue set a packet should use 2415 * @m: the packet 2416 * 2417 * Maps a packet to the SGE queue set it should use. The desired queue 2418 * set is carried in bits 1-3 in the packet's priority. 2419 */ 2420static __inline int 2421queue_set(const struct mbuf *m) 2422{ 2423 return m_get_priority(m) >> 1; 2424} 2425 2426/** 2427 * is_ctrl_pkt - return whether an offload packet is a control packet 2428 * @m: the packet 2429 * 2430 * Determines whether an offload packet should use an OFLD or a CTRL 2431 * Tx queue. This is indicated by bit 0 in the packet's priority. 2432 */ 2433static __inline int 2434is_ctrl_pkt(const struct mbuf *m) 2435{ 2436 return m_get_priority(m) & 1; 2437} 2438 2439/** 2440 * t3_offload_tx - send an offload packet 2441 * @tdev: the offload device to send to 2442 * @m: the packet 2443 * 2444 * Sends an offload packet. We use the packet priority to select the 2445 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2446 * should be sent as regular or control, bits 1-3 select the queue set. 2447 */ 2448int 2449t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2450{ 2451 adapter_t *adap = tdev2adap(tdev); 2452 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2453 2454 if (__predict_false(is_ctrl_pkt(m))) 2455 return ctrl_xmit(adap, qs, m); 2456 2457 return ofld_xmit(adap, qs, m); 2458} 2459 2460/** 2461 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2462 * @tdev: the offload device that will be receiving the packets 2463 * @q: the SGE response queue that assembled the bundle 2464 * @m: the partial bundle 2465 * @n: the number of packets in the bundle 2466 * 2467 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2468 */ 2469static __inline void 2470deliver_partial_bundle(struct t3cdev *tdev, 2471 struct sge_rspq *q, 2472 struct mbuf *mbufs[], int n) 2473{ 2474 if (n) { 2475 q->offload_bundles++; 2476 cxgb_ofld_recv(tdev, mbufs, n); 2477 } 2478} 2479 2480static __inline int 2481rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2482 struct mbuf *m, struct mbuf *rx_gather[], 2483 unsigned int gather_idx) 2484{ 2485 2486 rq->offload_pkts++; 2487 m->m_pkthdr.header = mtod(m, void *); 2488 rx_gather[gather_idx++] = m; 2489 if (gather_idx == RX_BUNDLE_SIZE) { 2490 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2491 gather_idx = 0; 2492 rq->offload_bundles++; 2493 } 2494 return (gather_idx); 2495} 2496 2497static void 2498restart_tx(struct sge_qset *qs) 2499{ 2500 struct adapter *sc = qs->port->adapter; 2501 2502 2503 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2504 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2505 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2506 qs->txq[TXQ_OFLD].restarts++; 2507 DPRINTF("restarting TXQ_OFLD\n"); 2508 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2509 } 2510 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2511 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2512 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2513 qs->txq[TXQ_CTRL].in_use); 2514 2515 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2516 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2517 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2518 qs->txq[TXQ_CTRL].restarts++; 2519 DPRINTF("restarting TXQ_CTRL\n"); 2520 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2521 } 2522} 2523 2524/** 2525 * t3_sge_alloc_qset - initialize an SGE queue set 2526 * @sc: the controller softc 2527 * @id: the queue set id 2528 * @nports: how many Ethernet ports will be using this queue set 2529 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2530 * @p: configuration parameters for this queue set 2531 * @ntxq: number of Tx queues for the queue set 2532 * @pi: port info for queue set 2533 * 2534 * Allocate resources and initialize an SGE queue set. A queue set 2535 * comprises a response queue, two Rx free-buffer queues, and up to 3 2536 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2537 * queue, offload queue, and control queue. 2538 */ 2539int 2540t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2541 const struct qset_params *p, int ntxq, struct port_info *pi) 2542{ 2543 struct sge_qset *q = &sc->sge.qs[id]; 2544 int i, ret = 0; 2545 2546 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2547 q->port = pi; 2548 2549 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2550 2551 if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2552 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2553 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2554 goto err; 2555 } 2556 if ((q->txq[i].txq_ifq = 2557 malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO)) 2558 == NULL) { 2559 device_printf(sc->dev, "failed to allocate ifq\n"); 2560 goto err; 2561 } 2562 ifq_init(q->txq[i].txq_ifq, pi->ifp); 2563 callout_init(&q->txq[i].txq_timer, 1); 2564 callout_init(&q->txq[i].txq_watchdog, 1); 2565 q->txq[i].txq_timer.c_cpu = id % mp_ncpus; 2566 q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus; 2567 } 2568 init_qset_cntxt(q, id); 2569 q->idx = id; 2570 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2571 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2572 &q->fl[0].desc, &q->fl[0].sdesc, 2573 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2574 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2575 printf("error %d from alloc ring fl0\n", ret); 2576 goto err; 2577 } 2578 2579 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2580 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2581 &q->fl[1].desc, &q->fl[1].sdesc, 2582 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2583 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2584 printf("error %d from alloc ring fl1\n", ret); 2585 goto err; 2586 } 2587 2588 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2589 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2590 &q->rspq.desc_tag, &q->rspq.desc_map, 2591 NULL, NULL)) != 0) { 2592 printf("error %d from alloc ring rspq\n", ret); 2593 goto err; 2594 } 2595 2596 for (i = 0; i < ntxq; ++i) { 2597 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2598 2599 if ((ret = alloc_ring(sc, p->txq_size[i], 2600 sizeof(struct tx_desc), sz, 2601 &q->txq[i].phys_addr, &q->txq[i].desc, 2602 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2603 &q->txq[i].desc_map, 2604 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2605 printf("error %d from alloc ring tx %i\n", ret, i); 2606 goto err; 2607 } 2608 mbufq_init(&q->txq[i].sendq); 2609 q->txq[i].gen = 1; 2610 q->txq[i].size = p->txq_size[i]; 2611 } 2612 2613 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2614 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2615 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2616 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2617 2618 q->fl[0].gen = q->fl[1].gen = 1; 2619 q->fl[0].size = p->fl_size; 2620 q->fl[1].size = p->jumbo_size; 2621 2622 q->rspq.gen = 1; 2623 q->rspq.cidx = 0; 2624 q->rspq.size = p->rspq_size; 2625 2626 q->txq[TXQ_ETH].stop_thres = nports * 2627 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2628 2629 q->fl[0].buf_size = MCLBYTES; 2630 q->fl[0].zone = zone_pack; 2631 q->fl[0].type = EXT_PACKET; 2632#if __FreeBSD_version > 800000 2633 if (cxgb_use_16k_clusters) { 2634 q->fl[1].buf_size = MJUM16BYTES; 2635 q->fl[1].zone = zone_jumbo16; 2636 q->fl[1].type = EXT_JUMBO16; 2637 } else { 2638 q->fl[1].buf_size = MJUM9BYTES; 2639 q->fl[1].zone = zone_jumbo9; 2640 q->fl[1].type = EXT_JUMBO9; 2641 } 2642#else 2643 q->fl[1].buf_size = MJUMPAGESIZE; 2644 q->fl[1].zone = zone_jumbop; 2645 q->fl[1].type = EXT_JUMBOP; 2646#endif 2647 2648 /* Allocate and setup the lro_ctrl structure */ 2649 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2650 ret = tcp_lro_init(&q->lro.ctrl); 2651 if (ret) { 2652 printf("error %d from tcp_lro_init\n", ret); 2653 goto err; 2654 } 2655 q->lro.ctrl.ifp = pi->ifp; 2656 2657 mtx_lock_spin(&sc->sge.reg_lock); 2658 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2659 q->rspq.phys_addr, q->rspq.size, 2660 q->fl[0].buf_size, 1, 0); 2661 if (ret) { 2662 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2663 goto err_unlock; 2664 } 2665 2666 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2667 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2668 q->fl[i].phys_addr, q->fl[i].size, 2669 q->fl[i].buf_size, p->cong_thres, 1, 2670 0); 2671 if (ret) { 2672 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2673 goto err_unlock; 2674 } 2675 } 2676 2677 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2678 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2679 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2680 1, 0); 2681 if (ret) { 2682 printf("error %d from t3_sge_init_ecntxt\n", ret); 2683 goto err_unlock; 2684 } 2685 2686 if (ntxq > 1) { 2687 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2688 USE_GTS, SGE_CNTXT_OFLD, id, 2689 q->txq[TXQ_OFLD].phys_addr, 2690 q->txq[TXQ_OFLD].size, 0, 1, 0); 2691 if (ret) { 2692 printf("error %d from t3_sge_init_ecntxt\n", ret); 2693 goto err_unlock; 2694 } 2695 } 2696 2697 if (ntxq > 2) { 2698 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2699 SGE_CNTXT_CTRL, id, 2700 q->txq[TXQ_CTRL].phys_addr, 2701 q->txq[TXQ_CTRL].size, 2702 q->txq[TXQ_CTRL].token, 1, 0); 2703 if (ret) { 2704 printf("error %d from t3_sge_init_ecntxt\n", ret); 2705 goto err_unlock; 2706 } 2707 } 2708 2709 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2710 device_get_unit(sc->dev), irq_vec_idx); 2711 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2712 2713 mtx_unlock_spin(&sc->sge.reg_lock); 2714 t3_update_qset_coalesce(q, p); 2715 q->port = pi; 2716 2717 refill_fl(sc, &q->fl[0], q->fl[0].size); 2718 refill_fl(sc, &q->fl[1], q->fl[1].size); 2719 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2720 2721 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2722 V_NEWTIMER(q->rspq.holdoff_tmr)); 2723 2724 return (0); 2725 2726err_unlock: 2727 mtx_unlock_spin(&sc->sge.reg_lock); 2728err: 2729 TXQ_LOCK(q); 2730 t3_free_qset(sc, q); 2731 2732 return (ret); 2733} 2734 2735/* 2736 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2737 * ethernet data. Hardware assistance with various checksums and any vlan tag 2738 * will also be taken into account here. 2739 */ 2740void 2741t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2742{ 2743 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2744 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2745 struct ifnet *ifp = pi->ifp; 2746 2747 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2748 2749 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2750 cpl->csum_valid && cpl->csum == 0xffff) { 2751 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2752 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2753 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2754 m->m_pkthdr.csum_data = 0xffff; 2755 } 2756 2757 if (cpl->vlan_valid) { 2758 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2759 m->m_flags |= M_VLANTAG; 2760 } 2761 2762 m->m_pkthdr.rcvif = ifp; 2763 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2764 /* 2765 * adjust after conversion to mbuf chain 2766 */ 2767 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2768 m->m_len -= (sizeof(*cpl) + ethpad); 2769 m->m_data += (sizeof(*cpl) + ethpad); 2770} 2771 2772/** 2773 * get_packet - return the next ingress packet buffer from a free list 2774 * @adap: the adapter that received the packet 2775 * @drop_thres: # of remaining buffers before we start dropping packets 2776 * @qs: the qset that the SGE free list holding the packet belongs to 2777 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2778 * @r: response descriptor 2779 * 2780 * Get the next packet from a free list and complete setup of the 2781 * sk_buff. If the packet is small we make a copy and recycle the 2782 * original buffer, otherwise we use the original buffer itself. If a 2783 * positive drop threshold is supplied packets are dropped and their 2784 * buffers recycled if (a) the number of remaining buffers is under the 2785 * threshold and the packet is too big to copy, or (b) the packet should 2786 * be copied but there is no memory for the copy. 2787 */ 2788static int 2789get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2790 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2791{ 2792 2793 unsigned int len_cq = ntohl(r->len_cq); 2794 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2795 int mask, cidx = fl->cidx; 2796 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2797 uint32_t len = G_RSPD_LEN(len_cq); 2798 uint32_t flags = M_EXT; 2799 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2800 caddr_t cl; 2801 struct mbuf *m; 2802 int ret = 0; 2803 2804 mask = fl->size - 1; 2805 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2806 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2807 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2808 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2809 2810 fl->credits--; 2811 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2812 2813 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2814 sopeop == RSPQ_SOP_EOP) { 2815 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2816 goto skip_recycle; 2817 cl = mtod(m, void *); 2818 memcpy(cl, sd->rxsd_cl, len); 2819 recycle_rx_buf(adap, fl, fl->cidx); 2820 m->m_pkthdr.len = m->m_len = len; 2821 m->m_flags = 0; 2822 mh->mh_head = mh->mh_tail = m; 2823 ret = 1; 2824 goto done; 2825 } else { 2826 skip_recycle: 2827 bus_dmamap_unload(fl->entry_tag, sd->map); 2828 cl = sd->rxsd_cl; 2829 m = sd->m; 2830 2831 if ((sopeop == RSPQ_SOP_EOP) || 2832 (sopeop == RSPQ_SOP)) 2833 flags |= M_PKTHDR; 2834 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); 2835 if (fl->zone == zone_pack) { 2836 /* 2837 * restore clobbered data pointer 2838 */ 2839 m->m_data = m->m_ext.ext_buf; 2840 } else { 2841 m_cljset(m, cl, fl->type); 2842 } 2843 m->m_len = len; 2844 } 2845 switch(sopeop) { 2846 case RSPQ_SOP_EOP: 2847 ret = 1; 2848 /* FALLTHROUGH */ 2849 case RSPQ_SOP: 2850 mh->mh_head = mh->mh_tail = m; 2851 m->m_pkthdr.len = len; 2852 break; 2853 case RSPQ_EOP: 2854 ret = 1; 2855 /* FALLTHROUGH */ 2856 case RSPQ_NSOP_NEOP: 2857 if (mh->mh_tail == NULL) { 2858 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2859 m_freem(m); 2860 break; 2861 } 2862 mh->mh_tail->m_next = m; 2863 mh->mh_tail = m; 2864 mh->mh_head->m_pkthdr.len += len; 2865 break; 2866 } 2867 if (cxgb_debug) 2868 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2869done: 2870 if (++fl->cidx == fl->size) 2871 fl->cidx = 0; 2872 2873 return (ret); 2874} 2875 2876/** 2877 * handle_rsp_cntrl_info - handles control information in a response 2878 * @qs: the queue set corresponding to the response 2879 * @flags: the response control flags 2880 * 2881 * Handles the control information of an SGE response, such as GTS 2882 * indications and completion credits for the queue set's Tx queues. 2883 * HW coalesces credits, we don't do any extra SW coalescing. 2884 */ 2885static __inline void 2886handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2887{ 2888 unsigned int credits; 2889 2890#if USE_GTS 2891 if (flags & F_RSPD_TXQ0_GTS) 2892 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2893#endif 2894 credits = G_RSPD_TXQ0_CR(flags); 2895 if (credits) 2896 qs->txq[TXQ_ETH].processed += credits; 2897 2898 credits = G_RSPD_TXQ2_CR(flags); 2899 if (credits) 2900 qs->txq[TXQ_CTRL].processed += credits; 2901 2902# if USE_GTS 2903 if (flags & F_RSPD_TXQ1_GTS) 2904 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2905# endif 2906 credits = G_RSPD_TXQ1_CR(flags); 2907 if (credits) 2908 qs->txq[TXQ_OFLD].processed += credits; 2909 2910} 2911 2912static void 2913check_ring_db(adapter_t *adap, struct sge_qset *qs, 2914 unsigned int sleeping) 2915{ 2916 ; 2917} 2918 2919/** 2920 * process_responses - process responses from an SGE response queue 2921 * @adap: the adapter 2922 * @qs: the queue set to which the response queue belongs 2923 * @budget: how many responses can be processed in this round 2924 * 2925 * Process responses from an SGE response queue up to the supplied budget. 2926 * Responses include received packets as well as credits and other events 2927 * for the queues that belong to the response queue's queue set. 2928 * A negative budget is effectively unlimited. 2929 * 2930 * Additionally choose the interrupt holdoff time for the next interrupt 2931 * on this queue. If the system is under memory shortage use a fairly 2932 * long delay to help recovery. 2933 */ 2934static int 2935process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2936{ 2937 struct sge_rspq *rspq = &qs->rspq; 2938 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2939 int budget_left = budget; 2940 unsigned int sleeping = 0; 2941 int lro_enabled = qs->lro.enabled; 2942 int skip_lro; 2943 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2944 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2945 int ngathered = 0; 2946#ifdef DEBUG 2947 static int last_holdoff = 0; 2948 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2949 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2950 last_holdoff = rspq->holdoff_tmr; 2951 } 2952#endif 2953 rspq->next_holdoff = rspq->holdoff_tmr; 2954 2955 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2956 int eth, eop = 0, ethpad = 0; 2957 uint32_t flags = ntohl(r->flags); 2958 uint32_t rss_csum = *(const uint32_t *)r; 2959 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2960 2961 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2962 2963 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2964 struct mbuf *m; 2965 2966 if (cxgb_debug) 2967 printf("async notification\n"); 2968 2969 if (rspq->rspq_mh.mh_head == NULL) { 2970 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2971 m = rspq->rspq_mh.mh_head; 2972 } else { 2973 m = m_gethdr(M_DONTWAIT, MT_DATA); 2974 } 2975 if (m == NULL) 2976 goto no_mem; 2977 2978 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 2979 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 2980 *mtod(m, char *) = CPL_ASYNC_NOTIF; 2981 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 2982 eop = 1; 2983 rspq->async_notif++; 2984 goto skip; 2985 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2986 struct mbuf *m = NULL; 2987 2988 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 2989 r->rss_hdr.opcode, rspq->cidx); 2990 if (rspq->rspq_mh.mh_head == NULL) 2991 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2992 else 2993 m = m_gethdr(M_DONTWAIT, MT_DATA); 2994 2995 if (rspq->rspq_mh.mh_head == NULL && m == NULL) { 2996 no_mem: 2997 rspq->next_holdoff = NOMEM_INTR_DELAY; 2998 budget_left--; 2999 break; 3000 } 3001 get_imm_packet(adap, r, rspq->rspq_mh.mh_head); 3002 eop = 1; 3003 rspq->imm_data++; 3004 } else if (r->len_cq) { 3005 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 3006 3007 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 3008 if (eop) { 3009 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID; 3010 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash; 3011 } 3012 3013 ethpad = 2; 3014 } else { 3015 rspq->pure_rsps++; 3016 } 3017 skip: 3018 if (flags & RSPD_CTRL_MASK) { 3019 sleeping |= flags & RSPD_GTS_MASK; 3020 handle_rsp_cntrl_info(qs, flags); 3021 } 3022 3023 r++; 3024 if (__predict_false(++rspq->cidx == rspq->size)) { 3025 rspq->cidx = 0; 3026 rspq->gen ^= 1; 3027 r = rspq->desc; 3028 } 3029 3030 if (++rspq->credits >= (rspq->size / 4)) { 3031 refill_rspq(adap, rspq, rspq->credits); 3032 rspq->credits = 0; 3033 } 3034 if (!eth && eop) { 3035 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 3036 /* 3037 * XXX size mismatch 3038 */ 3039 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 3040 3041 3042 ngathered = rx_offload(&adap->tdev, rspq, 3043 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 3044 rspq->rspq_mh.mh_head = NULL; 3045 DPRINTF("received offload packet\n"); 3046 3047 } else if (eth && eop) { 3048 struct mbuf *m = rspq->rspq_mh.mh_head; 3049 3050 t3_rx_eth(adap, rspq, m, ethpad); 3051 3052 /* 3053 * The T304 sends incoming packets on any qset. If LRO 3054 * is also enabled, we could end up sending packet up 3055 * lro_ctrl->ifp's input. That is incorrect. 3056 * 3057 * The mbuf's rcvif was derived from the cpl header and 3058 * is accurate. Skip LRO and just use that. 3059 */ 3060 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 3061 3062 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && 3063 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) { 3064 /* successfully queue'd for LRO */ 3065 } else { 3066 /* 3067 * LRO not enabled, packet unsuitable for LRO, 3068 * or unable to queue. Pass it up right now in 3069 * either case. 3070 */ 3071 struct ifnet *ifp = m->m_pkthdr.rcvif; 3072 (*ifp->if_input)(ifp, m); 3073 } 3074 rspq->rspq_mh.mh_head = NULL; 3075 3076 } 3077 __refill_fl_lt(adap, &qs->fl[0], 32); 3078 __refill_fl_lt(adap, &qs->fl[1], 32); 3079 --budget_left; 3080 } 3081 3082 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 3083 3084 /* Flush LRO */ 3085 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 3086 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 3087 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 3088 tcp_lro_flush(lro_ctrl, queued); 3089 } 3090 3091 if (sleeping) 3092 check_ring_db(adap, qs, sleeping); 3093 3094 mb(); /* commit Tx queue processed updates */ 3095 if (__predict_false(qs->txq_stopped > 1)) 3096 restart_tx(qs); 3097 3098 __refill_fl_lt(adap, &qs->fl[0], 512); 3099 __refill_fl_lt(adap, &qs->fl[1], 512); 3100 budget -= budget_left; 3101 return (budget); 3102} 3103 3104/* 3105 * A helper function that processes responses and issues GTS. 3106 */ 3107static __inline int 3108process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3109{ 3110 int work; 3111 static int last_holdoff = 0; 3112 3113 work = process_responses(adap, rspq_to_qset(rq), -1); 3114 3115 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3116 printf("next_holdoff=%d\n", rq->next_holdoff); 3117 last_holdoff = rq->next_holdoff; 3118 } 3119 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3120 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3121 3122 return (work); 3123} 3124 3125 3126/* 3127 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3128 * Handles data events from SGE response queues as well as error and other 3129 * async events as they all use the same interrupt pin. We use one SGE 3130 * response queue per port in this mode and protect all response queues with 3131 * queue 0's lock. 3132 */ 3133void 3134t3b_intr(void *data) 3135{ 3136 uint32_t i, map; 3137 adapter_t *adap = data; 3138 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3139 3140 t3_write_reg(adap, A_PL_CLI, 0); 3141 map = t3_read_reg(adap, A_SG_DATA_INTR); 3142 3143 if (!map) 3144 return; 3145 3146 if (__predict_false(map & F_ERRINTR)) 3147 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3148 3149 mtx_lock(&q0->lock); 3150 for_each_port(adap, i) 3151 if (map & (1 << i)) 3152 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3153 mtx_unlock(&q0->lock); 3154} 3155 3156/* 3157 * The MSI interrupt handler. This needs to handle data events from SGE 3158 * response queues as well as error and other async events as they all use 3159 * the same MSI vector. We use one SGE response queue per port in this mode 3160 * and protect all response queues with queue 0's lock. 3161 */ 3162void 3163t3_intr_msi(void *data) 3164{ 3165 adapter_t *adap = data; 3166 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3167 int i, new_packets = 0; 3168 3169 mtx_lock(&q0->lock); 3170 3171 for_each_port(adap, i) 3172 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3173 new_packets = 1; 3174 mtx_unlock(&q0->lock); 3175 if (new_packets == 0) 3176 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3177} 3178 3179void 3180t3_intr_msix(void *data) 3181{ 3182 struct sge_qset *qs = data; 3183 adapter_t *adap = qs->port->adapter; 3184 struct sge_rspq *rspq = &qs->rspq; 3185 3186 if (process_responses_gts(adap, rspq) == 0) 3187 rspq->unhandled_irqs++; 3188} 3189 3190#define QDUMP_SBUF_SIZE 32 * 400 3191static int 3192t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3193{ 3194 struct sge_rspq *rspq; 3195 struct sge_qset *qs; 3196 int i, err, dump_end, idx; 3197 static int multiplier = 1; 3198 struct sbuf *sb; 3199 struct rsp_desc *rspd; 3200 uint32_t data[4]; 3201 3202 rspq = arg1; 3203 qs = rspq_to_qset(rspq); 3204 if (rspq->rspq_dump_count == 0) 3205 return (0); 3206 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3207 log(LOG_WARNING, 3208 "dump count is too large %d\n", rspq->rspq_dump_count); 3209 rspq->rspq_dump_count = 0; 3210 return (EINVAL); 3211 } 3212 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3213 log(LOG_WARNING, 3214 "dump start of %d is greater than queue size\n", 3215 rspq->rspq_dump_start); 3216 rspq->rspq_dump_start = 0; 3217 return (EINVAL); 3218 } 3219 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3220 if (err) 3221 return (err); 3222retry_sbufops: 3223 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3224 3225 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3226 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3227 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3228 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3229 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3230 3231 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3232 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3233 3234 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3235 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3236 idx = i & (RSPQ_Q_SIZE-1); 3237 3238 rspd = &rspq->desc[idx]; 3239 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3240 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3241 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3242 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3243 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3244 be32toh(rspd->len_cq), rspd->intr_gen); 3245 } 3246 if (sbuf_overflowed(sb)) { 3247 sbuf_delete(sb); 3248 multiplier++; 3249 goto retry_sbufops; 3250 } 3251 sbuf_finish(sb); 3252 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3253 sbuf_delete(sb); 3254 return (err); 3255} 3256 3257static int 3258t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3259{ 3260 struct sge_txq *txq; 3261 struct sge_qset *qs; 3262 int i, j, err, dump_end; 3263 static int multiplier = 1; 3264 struct sbuf *sb; 3265 struct tx_desc *txd; 3266 uint32_t *WR, wr_hi, wr_lo, gen; 3267 uint32_t data[4]; 3268 3269 txq = arg1; 3270 qs = txq_to_qset(txq, TXQ_ETH); 3271 if (txq->txq_dump_count == 0) { 3272 return (0); 3273 } 3274 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3275 log(LOG_WARNING, 3276 "dump count is too large %d\n", txq->txq_dump_count); 3277 txq->txq_dump_count = 1; 3278 return (EINVAL); 3279 } 3280 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3281 log(LOG_WARNING, 3282 "dump start of %d is greater than queue size\n", 3283 txq->txq_dump_start); 3284 txq->txq_dump_start = 0; 3285 return (EINVAL); 3286 } 3287 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3288 if (err) 3289 return (err); 3290 3291 3292retry_sbufops: 3293 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3294 3295 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3296 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3297 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3298 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3299 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3300 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3301 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3302 txq->txq_dump_start, 3303 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3304 3305 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3306 for (i = txq->txq_dump_start; i < dump_end; i++) { 3307 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3308 WR = (uint32_t *)txd->flit; 3309 wr_hi = ntohl(WR[0]); 3310 wr_lo = ntohl(WR[1]); 3311 gen = G_WR_GEN(wr_lo); 3312 3313 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3314 wr_hi, wr_lo, gen); 3315 for (j = 2; j < 30; j += 4) 3316 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3317 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3318 3319 } 3320 if (sbuf_overflowed(sb)) { 3321 sbuf_delete(sb); 3322 multiplier++; 3323 goto retry_sbufops; 3324 } 3325 sbuf_finish(sb); 3326 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3327 sbuf_delete(sb); 3328 return (err); 3329} 3330 3331static int 3332t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3333{ 3334 struct sge_txq *txq; 3335 struct sge_qset *qs; 3336 int i, j, err, dump_end; 3337 static int multiplier = 1; 3338 struct sbuf *sb; 3339 struct tx_desc *txd; 3340 uint32_t *WR, wr_hi, wr_lo, gen; 3341 3342 txq = arg1; 3343 qs = txq_to_qset(txq, TXQ_CTRL); 3344 if (txq->txq_dump_count == 0) { 3345 return (0); 3346 } 3347 if (txq->txq_dump_count > 256) { 3348 log(LOG_WARNING, 3349 "dump count is too large %d\n", txq->txq_dump_count); 3350 txq->txq_dump_count = 1; 3351 return (EINVAL); 3352 } 3353 if (txq->txq_dump_start > 255) { 3354 log(LOG_WARNING, 3355 "dump start of %d is greater than queue size\n", 3356 txq->txq_dump_start); 3357 txq->txq_dump_start = 0; 3358 return (EINVAL); 3359 } 3360 3361retry_sbufops: 3362 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3363 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3364 txq->txq_dump_start, 3365 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3366 3367 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3368 for (i = txq->txq_dump_start; i < dump_end; i++) { 3369 txd = &txq->desc[i & (255)]; 3370 WR = (uint32_t *)txd->flit; 3371 wr_hi = ntohl(WR[0]); 3372 wr_lo = ntohl(WR[1]); 3373 gen = G_WR_GEN(wr_lo); 3374 3375 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3376 wr_hi, wr_lo, gen); 3377 for (j = 2; j < 30; j += 4) 3378 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3379 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3380 3381 } 3382 if (sbuf_overflowed(sb)) { 3383 sbuf_delete(sb); 3384 multiplier++; 3385 goto retry_sbufops; 3386 } 3387 sbuf_finish(sb); 3388 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3389 sbuf_delete(sb); 3390 return (err); 3391} 3392 3393static int 3394t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3395{ 3396 adapter_t *sc = arg1; 3397 struct qset_params *qsp = &sc->params.sge.qset[0]; 3398 int coalesce_usecs; 3399 struct sge_qset *qs; 3400 int i, j, err, nqsets = 0; 3401 struct mtx *lock; 3402 3403 if ((sc->flags & FULL_INIT_DONE) == 0) 3404 return (ENXIO); 3405 3406 coalesce_usecs = qsp->coalesce_usecs; 3407 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3408 3409 if (err != 0) { 3410 return (err); 3411 } 3412 if (coalesce_usecs == qsp->coalesce_usecs) 3413 return (0); 3414 3415 for (i = 0; i < sc->params.nports; i++) 3416 for (j = 0; j < sc->port[i].nqsets; j++) 3417 nqsets++; 3418 3419 coalesce_usecs = max(1, coalesce_usecs); 3420 3421 for (i = 0; i < nqsets; i++) { 3422 qs = &sc->sge.qs[i]; 3423 qsp = &sc->params.sge.qset[i]; 3424 qsp->coalesce_usecs = coalesce_usecs; 3425 3426 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3427 &sc->sge.qs[0].rspq.lock; 3428 3429 mtx_lock(lock); 3430 t3_update_qset_coalesce(qs, qsp); 3431 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3432 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3433 mtx_unlock(lock); 3434 } 3435 3436 return (0); 3437} 3438 3439 3440void 3441t3_add_attach_sysctls(adapter_t *sc) 3442{ 3443 struct sysctl_ctx_list *ctx; 3444 struct sysctl_oid_list *children; 3445 3446 ctx = device_get_sysctl_ctx(sc->dev); 3447 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3448 3449 /* random information */ 3450 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3451 "firmware_version", 3452 CTLFLAG_RD, &sc->fw_version, 3453 0, "firmware version"); 3454 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3455 "hw_revision", 3456 CTLFLAG_RD, &sc->params.rev, 3457 0, "chip model"); 3458 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3459 "port_types", 3460 CTLFLAG_RD, &sc->port_types, 3461 0, "type of ports"); 3462 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3463 "enable_debug", 3464 CTLFLAG_RW, &cxgb_debug, 3465 0, "enable verbose debugging output"); 3466 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3467 CTLFLAG_RD, &sc->tunq_coalesce, 3468 "#tunneled packets freed"); 3469 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3470 "txq_overrun", 3471 CTLFLAG_RD, &txq_fills, 3472 0, "#times txq overrun"); 3473} 3474 3475 3476static const char *rspq_name = "rspq"; 3477static const char *txq_names[] = 3478{ 3479 "txq_eth", 3480 "txq_ofld", 3481 "txq_ctrl" 3482}; 3483 3484static int 3485sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3486{ 3487 struct port_info *p = arg1; 3488 uint64_t *parg; 3489 3490 if (!p) 3491 return (EINVAL); 3492 3493 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3494 PORT_LOCK(p); 3495 t3_mac_update_stats(&p->mac); 3496 PORT_UNLOCK(p); 3497 3498 return (sysctl_handle_quad(oidp, parg, 0, req)); 3499} 3500 3501void 3502t3_add_configured_sysctls(adapter_t *sc) 3503{ 3504 struct sysctl_ctx_list *ctx; 3505 struct sysctl_oid_list *children; 3506 int i, j; 3507 3508 ctx = device_get_sysctl_ctx(sc->dev); 3509 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3510 3511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3512 "intr_coal", 3513 CTLTYPE_INT|CTLFLAG_RW, sc, 3514 0, t3_set_coalesce_usecs, 3515 "I", "interrupt coalescing timer (us)"); 3516 3517 for (i = 0; i < sc->params.nports; i++) { 3518 struct port_info *pi = &sc->port[i]; 3519 struct sysctl_oid *poid; 3520 struct sysctl_oid_list *poidlist; 3521 struct mac_stats *mstats = &pi->mac.stats; 3522 3523 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3524 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3525 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3526 poidlist = SYSCTL_CHILDREN(poid); 3527 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3528 "nqsets", CTLFLAG_RD, &pi->nqsets, 3529 0, "#queue sets"); 3530 3531 for (j = 0; j < pi->nqsets; j++) { 3532 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3533 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3534 *ctrlqpoid, *lropoid; 3535 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3536 *txqpoidlist, *ctrlqpoidlist, 3537 *lropoidlist; 3538 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3539 3540 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3541 3542 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3543 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3544 qspoidlist = SYSCTL_CHILDREN(qspoid); 3545 3546 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3547 CTLFLAG_RD, &qs->fl[0].empty, 0, 3548 "freelist #0 empty"); 3549 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3550 CTLFLAG_RD, &qs->fl[1].empty, 0, 3551 "freelist #1 empty"); 3552 3553 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3554 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3555 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3556 3557 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3558 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3559 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3560 3561 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3562 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3563 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3564 3565 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3566 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3567 lropoidlist = SYSCTL_CHILDREN(lropoid); 3568 3569 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3570 CTLFLAG_RD, &qs->rspq.size, 3571 0, "#entries in response queue"); 3572 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3573 CTLFLAG_RD, &qs->rspq.cidx, 3574 0, "consumer index"); 3575 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3576 CTLFLAG_RD, &qs->rspq.credits, 3577 0, "#credits"); 3578 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3579 CTLFLAG_RD, &qs->rspq.phys_addr, 3580 "physical_address_of the queue"); 3581 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3582 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3583 0, "start rspq dump entry"); 3584 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3585 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3586 0, "#rspq entries to dump"); 3587 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3588 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3589 0, t3_dump_rspq, "A", "dump of the response queue"); 3590 3591 3592 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3593 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3594 0, "#tunneled packets dropped"); 3595 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3596 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3597 0, "#tunneled packets waiting to be sent"); 3598#if 0 3599 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3600 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3601 0, "#tunneled packets queue producer index"); 3602 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3603 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3604 0, "#tunneled packets queue consumer index"); 3605#endif 3606 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3607 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3608 0, "#tunneled packets processed by the card"); 3609 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3610 CTLFLAG_RD, &txq->cleaned, 3611 0, "#tunneled packets cleaned"); 3612 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3613 CTLFLAG_RD, &txq->in_use, 3614 0, "#tunneled packet slots in use"); 3615 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3616 CTLFLAG_RD, &txq->txq_frees, 3617 "#tunneled packets freed"); 3618 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3619 CTLFLAG_RD, &txq->txq_skipped, 3620 0, "#tunneled packet descriptors skipped"); 3621 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3622 CTLFLAG_RD, &txq->txq_coalesced, 3623 "#tunneled packets coalesced"); 3624 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3625 CTLFLAG_RD, &txq->txq_enqueued, 3626 0, "#tunneled packets enqueued to hardware"); 3627 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3628 CTLFLAG_RD, &qs->txq_stopped, 3629 0, "tx queues stopped"); 3630 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3631 CTLFLAG_RD, &txq->phys_addr, 3632 "physical_address_of the queue"); 3633 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3634 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3635 0, "txq generation"); 3636 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3637 CTLFLAG_RD, &txq->cidx, 3638 0, "hardware queue cidx"); 3639 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3640 CTLFLAG_RD, &txq->pidx, 3641 0, "hardware queue pidx"); 3642 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3643 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3644 0, "txq start idx for dump"); 3645 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3646 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3647 0, "txq #entries to dump"); 3648 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3649 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3650 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3651 3652 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3653 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3654 0, "ctrlq start idx for dump"); 3655 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3656 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3657 0, "ctrl #entries to dump"); 3658 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3659 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3660 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3661 3662 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3663 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3664 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3665 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3666 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3667 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3668 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3669 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3670 } 3671 3672 /* Now add a node for mac stats. */ 3673 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3674 CTLFLAG_RD, NULL, "MAC statistics"); 3675 poidlist = SYSCTL_CHILDREN(poid); 3676 3677 /* 3678 * We (ab)use the length argument (arg2) to pass on the offset 3679 * of the data that we are interested in. This is only required 3680 * for the quad counters that are updated from the hardware (we 3681 * make sure that we return the latest value). 3682 * sysctl_handle_macstat first updates *all* the counters from 3683 * the hardware, and then returns the latest value of the 3684 * requested counter. Best would be to update only the 3685 * requested counter from hardware, but t3_mac_update_stats() 3686 * hides all the register details and we don't want to dive into 3687 * all that here. 3688 */ 3689#define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3690 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3691 sysctl_handle_macstat, "QU", 0) 3692 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3693 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3694 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3695 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3696 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3697 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3698 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3699 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3700 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3701 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3702 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3703 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3704 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3705 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3706 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3707 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3708 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3709 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3710 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3711 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3712 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3713 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3714 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3715 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3716 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3717 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3718 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3719 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3720 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3721 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3722 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3723 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3724 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3725 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3726 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3727 CXGB_SYSCTL_ADD_QUAD(rx_short); 3728 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3729 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3730 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3731 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3732 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3733 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3734 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3735 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3736 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3737 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3738#undef CXGB_SYSCTL_ADD_QUAD 3739 3740#define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3741 CTLFLAG_RD, &mstats->a, 0) 3742 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3743 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3744 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3745 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3746 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3747 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3748 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3749 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3750 CXGB_SYSCTL_ADD_ULONG(num_resets); 3751 CXGB_SYSCTL_ADD_ULONG(link_faults); 3752#undef CXGB_SYSCTL_ADD_ULONG 3753 } 3754} 3755 3756/** 3757 * t3_get_desc - dump an SGE descriptor for debugging purposes 3758 * @qs: the queue set 3759 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3760 * @idx: the descriptor index in the queue 3761 * @data: where to dump the descriptor contents 3762 * 3763 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3764 * size of the descriptor. 3765 */ 3766int 3767t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3768 unsigned char *data) 3769{ 3770 if (qnum >= 6) 3771 return (EINVAL); 3772 3773 if (qnum < 3) { 3774 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3775 return -EINVAL; 3776 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3777 return sizeof(struct tx_desc); 3778 } 3779 3780 if (qnum == 3) { 3781 if (!qs->rspq.desc || idx >= qs->rspq.size) 3782 return (EINVAL); 3783 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3784 return sizeof(struct rsp_desc); 3785 } 3786 3787 qnum -= 4; 3788 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3789 return (EINVAL); 3790 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3791 return sizeof(struct rx_desc); 3792} 3793