1/************************************************************************** 2SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4Copyright (c) 2007-2009, Chelsio Inc. 5All rights reserved. 6 7Redistribution and use in source and binary forms, with or without 8modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Chelsio Corporation nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27POSSIBILITY OF SUCH DAMAGE. 28 29***************************************************************************/ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD$"); 33 34#include "opt_inet6.h" 35#include "opt_inet.h" 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/kernel.h> 40#include <sys/module.h> 41#include <sys/bus.h> 42#include <sys/conf.h> 43#include <machine/bus.h> 44#include <machine/resource.h> 45#include <sys/rman.h> 46#include <sys/queue.h> 47#include <sys/sysctl.h> 48#include <sys/taskqueue.h> 49 50#include <sys/proc.h> 51#include <sys/sbuf.h> 52#include <sys/sched.h> 53#include <sys/smp.h> 54#include <sys/systm.h> 55#include <sys/syslog.h> 56#include <sys/socket.h> 57#include <sys/sglist.h> 58 59#include <net/if.h> 60#include <net/if_var.h> 61#include <net/bpf.h> 62#include <net/ethernet.h> 63#include <net/if_vlan_var.h> 64 65#include <netinet/in_systm.h> 66#include <netinet/in.h> 67#include <netinet/ip.h> 68#include <netinet/ip6.h> 69#include <netinet/tcp.h> 70 71#include <dev/pci/pcireg.h> 72#include <dev/pci/pcivar.h> 73 74#include <vm/vm.h> 75#include <vm/pmap.h> 76 77#include <cxgb_include.h> 78#include <sys/mvec.h> 79 80int txq_fills = 0; 81int multiq_tx_enable = 1; 82 83#ifdef TCP_OFFLOAD 84CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS); 85#endif 86 87extern struct sysctl_oid_list sysctl__hw_cxgb_children; 88int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 89SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 90 "size of per-queue mbuf ring"); 91 92static int cxgb_tx_coalesce_force = 0; 93SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN, 94 &cxgb_tx_coalesce_force, 0, 95 "coalesce small packets into a single work request regardless of ring state"); 96 97#define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 98#define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 99#define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 100#define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 101#define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 102#define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 103#define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 104 105 106static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 107SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN, 108 &cxgb_tx_coalesce_enable_start, 0, 109 "coalesce enable threshold"); 110static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 111SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN, 112 &cxgb_tx_coalesce_enable_stop, 0, 113 "coalesce disable threshold"); 114static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 115SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN, 116 &cxgb_tx_reclaim_threshold, 0, 117 "tx cleaning minimum threshold"); 118 119/* 120 * XXX don't re-enable this until TOE stops assuming 121 * we have an m_ext 122 */ 123static int recycle_enable = 0; 124 125extern int cxgb_use_16k_clusters; 126extern int nmbjumbop; 127extern int nmbjumbo9; 128extern int nmbjumbo16; 129 130#define USE_GTS 0 131 132#define SGE_RX_SM_BUF_SIZE 1536 133#define SGE_RX_DROP_THRES 16 134#define SGE_RX_COPY_THRES 128 135 136/* 137 * Period of the Tx buffer reclaim timer. This timer does not need to run 138 * frequently as Tx buffers are usually reclaimed by new Tx packets. 139 */ 140#define TX_RECLAIM_PERIOD (hz >> 1) 141 142/* 143 * Values for sge_txq.flags 144 */ 145enum { 146 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 147 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 148}; 149 150struct tx_desc { 151 uint64_t flit[TX_DESC_FLITS]; 152} __packed; 153 154struct rx_desc { 155 uint32_t addr_lo; 156 uint32_t len_gen; 157 uint32_t gen2; 158 uint32_t addr_hi; 159} __packed; 160 161struct rsp_desc { /* response queue descriptor */ 162 struct rss_header rss_hdr; 163 uint32_t flags; 164 uint32_t len_cq; 165 uint8_t imm_data[47]; 166 uint8_t intr_gen; 167} __packed; 168 169#define RX_SW_DESC_MAP_CREATED (1 << 0) 170#define TX_SW_DESC_MAP_CREATED (1 << 1) 171#define RX_SW_DESC_INUSE (1 << 3) 172#define TX_SW_DESC_MAPPED (1 << 4) 173 174#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 175#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 176#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 177#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 178 179struct tx_sw_desc { /* SW state per Tx descriptor */ 180 struct mbuf *m; 181 bus_dmamap_t map; 182 int flags; 183}; 184 185struct rx_sw_desc { /* SW state per Rx descriptor */ 186 caddr_t rxsd_cl; 187 struct mbuf *m; 188 bus_dmamap_t map; 189 int flags; 190}; 191 192struct txq_state { 193 unsigned int compl; 194 unsigned int gen; 195 unsigned int pidx; 196}; 197 198struct refill_fl_cb_arg { 199 int error; 200 bus_dma_segment_t seg; 201 int nseg; 202}; 203 204 205/* 206 * Maps a number of flits to the number of Tx descriptors that can hold them. 207 * The formula is 208 * 209 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 210 * 211 * HW allows up to 4 descriptors to be combined into a WR. 212 */ 213static uint8_t flit_desc_map[] = { 214 0, 215#if SGE_NUM_GENBITS == 1 216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 218 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 219 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 220#elif SGE_NUM_GENBITS == 2 221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 223 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 224 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 225#else 226# error "SGE_NUM_GENBITS must be 1 or 2" 227#endif 228}; 229 230#define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 231#define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 232#define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 233#define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 234#define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 235#define TXQ_RING_NEEDS_ENQUEUE(qs) \ 236 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 237#define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 238#define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 239 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 240#define TXQ_RING_DEQUEUE(qs) \ 241 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 242 243int cxgb_debug = 0; 244 245static void sge_timer_cb(void *arg); 246static void sge_timer_reclaim(void *arg, int ncount); 247static void sge_txq_reclaim_handler(void *arg, int ncount); 248static void cxgb_start_locked(struct sge_qset *qs); 249 250/* 251 * XXX need to cope with bursty scheduling by looking at a wider 252 * window than we are now for determining the need for coalescing 253 * 254 */ 255static __inline uint64_t 256check_pkt_coalesce(struct sge_qset *qs) 257{ 258 struct adapter *sc; 259 struct sge_txq *txq; 260 uint8_t *fill; 261 262 if (__predict_false(cxgb_tx_coalesce_force)) 263 return (1); 264 txq = &qs->txq[TXQ_ETH]; 265 sc = qs->port->adapter; 266 fill = &sc->tunq_fill[qs->idx]; 267 268 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 269 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 270 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 271 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 272 /* 273 * if the hardware transmit queue is more than 1/8 full 274 * we mark it as coalescing - we drop back from coalescing 275 * when we go below 1/32 full and there are no packets enqueued, 276 * this provides us with some degree of hysteresis 277 */ 278 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 279 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 280 *fill = 0; 281 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 282 *fill = 1; 283 284 return (sc->tunq_coalesce); 285} 286 287#ifdef __LP64__ 288static void 289set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 290{ 291 uint64_t wr_hilo; 292#if _BYTE_ORDER == _LITTLE_ENDIAN 293 wr_hilo = wr_hi; 294 wr_hilo |= (((uint64_t)wr_lo)<<32); 295#else 296 wr_hilo = wr_lo; 297 wr_hilo |= (((uint64_t)wr_hi)<<32); 298#endif 299 wrp->wrh_hilo = wr_hilo; 300} 301#else 302static void 303set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 304{ 305 306 wrp->wrh_hi = wr_hi; 307 wmb(); 308 wrp->wrh_lo = wr_lo; 309} 310#endif 311 312struct coalesce_info { 313 int count; 314 int nbytes; 315 int noncoal; 316}; 317 318static int 319coalesce_check(struct mbuf *m, void *arg) 320{ 321 struct coalesce_info *ci = arg; 322 323 if ((m->m_next != NULL) || 324 ((mtod(m, vm_offset_t) & PAGE_MASK) + m->m_len > PAGE_SIZE)) 325 ci->noncoal = 1; 326 327 if ((ci->count == 0) || (ci->noncoal == 0 && (ci->count < 7) && 328 (ci->nbytes + m->m_len <= 10500))) { 329 ci->count++; 330 ci->nbytes += m->m_len; 331 return (1); 332 } 333 return (0); 334} 335 336static struct mbuf * 337cxgb_dequeue(struct sge_qset *qs) 338{ 339 struct mbuf *m, *m_head, *m_tail; 340 struct coalesce_info ci; 341 342 343 if (check_pkt_coalesce(qs) == 0) 344 return TXQ_RING_DEQUEUE(qs); 345 346 m_head = m_tail = NULL; 347 ci.count = ci.nbytes = ci.noncoal = 0; 348 do { 349 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 350 if (m_head == NULL) { 351 m_tail = m_head = m; 352 } else if (m != NULL) { 353 m_tail->m_nextpkt = m; 354 m_tail = m; 355 } 356 } while (m != NULL); 357 if (ci.count > 7) 358 panic("trying to coalesce %d packets in to one WR", ci.count); 359 return (m_head); 360} 361 362/** 363 * reclaim_completed_tx - reclaims completed Tx descriptors 364 * @adapter: the adapter 365 * @q: the Tx queue to reclaim completed descriptors from 366 * 367 * Reclaims Tx descriptors that the SGE has indicated it has processed, 368 * and frees the associated buffers if possible. Called with the Tx 369 * queue's lock held. 370 */ 371static __inline int 372reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 373{ 374 struct sge_txq *q = &qs->txq[queue]; 375 int reclaim = desc_reclaimable(q); 376 377 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 378 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 379 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 380 381 if (reclaim < reclaim_min) 382 return (0); 383 384 mtx_assert(&qs->lock, MA_OWNED); 385 if (reclaim > 0) { 386 t3_free_tx_desc(qs, reclaim, queue); 387 q->cleaned += reclaim; 388 q->in_use -= reclaim; 389 } 390 if (isset(&qs->txq_stopped, TXQ_ETH)) 391 clrbit(&qs->txq_stopped, TXQ_ETH); 392 393 return (reclaim); 394} 395 396#ifdef NETDUMP 397int 398cxgb_netdump_poll_tx(struct sge_qset *qs) 399{ 400 401 return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH)); 402} 403#endif 404 405/** 406 * should_restart_tx - are there enough resources to restart a Tx queue? 407 * @q: the Tx queue 408 * 409 * Checks if there are enough descriptors to restart a suspended Tx queue. 410 */ 411static __inline int 412should_restart_tx(const struct sge_txq *q) 413{ 414 unsigned int r = q->processed - q->cleaned; 415 416 return q->in_use - r < (q->size >> 1); 417} 418 419/** 420 * t3_sge_init - initialize SGE 421 * @adap: the adapter 422 * @p: the SGE parameters 423 * 424 * Performs SGE initialization needed every time after a chip reset. 425 * We do not initialize any of the queue sets here, instead the driver 426 * top-level must request those individually. We also do not enable DMA 427 * here, that should be done after the queues have been set up. 428 */ 429void 430t3_sge_init(adapter_t *adap, struct sge_params *p) 431{ 432 u_int ctrl, ups; 433 434 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 435 436 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 437 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 438 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 439 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 440#if SGE_NUM_GENBITS == 1 441 ctrl |= F_EGRGENCTRL; 442#endif 443 if (adap->params.rev > 0) { 444 if (!(adap->flags & (USING_MSIX | USING_MSI))) 445 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 446 } 447 t3_write_reg(adap, A_SG_CONTROL, ctrl); 448 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 449 V_LORCQDRBTHRSH(512)); 450 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 451 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 452 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 453 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 454 adap->params.rev < T3_REV_C ? 1000 : 500); 455 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 456 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 457 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 458 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 459 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 460} 461 462 463/** 464 * sgl_len - calculates the size of an SGL of the given capacity 465 * @n: the number of SGL entries 466 * 467 * Calculates the number of flits needed for a scatter/gather list that 468 * can hold the given number of entries. 469 */ 470static __inline unsigned int 471sgl_len(unsigned int n) 472{ 473 return ((3 * n) / 2 + (n & 1)); 474} 475 476/** 477 * get_imm_packet - return the next ingress packet buffer from a response 478 * @resp: the response descriptor containing the packet data 479 * 480 * Return a packet containing the immediate data of the given response. 481 */ 482static int 483get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 484{ 485 486 if (resp->rss_hdr.opcode == CPL_RX_DATA) { 487 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0]; 488 m->m_len = sizeof(*cpl) + ntohs(cpl->len); 489 } else if (resp->rss_hdr.opcode == CPL_RX_PKT) { 490 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0]; 491 m->m_len = sizeof(*cpl) + ntohs(cpl->len); 492 } else 493 m->m_len = IMMED_PKT_SIZE; 494 m->m_ext.ext_buf = NULL; 495 m->m_ext.ext_type = 0; 496 memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 497 return (0); 498} 499 500static __inline u_int 501flits_to_desc(u_int n) 502{ 503 return (flit_desc_map[n]); 504} 505 506#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 507 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 508 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 509 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 510 F_HIRCQPARITYERROR) 511#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 512#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 513 F_RSPQDISABLED) 514 515/** 516 * t3_sge_err_intr_handler - SGE async event interrupt handler 517 * @adapter: the adapter 518 * 519 * Interrupt handler for SGE asynchronous (non-data) events. 520 */ 521void 522t3_sge_err_intr_handler(adapter_t *adapter) 523{ 524 unsigned int v, status; 525 526 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 527 if (status & SGE_PARERR) 528 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 529 status & SGE_PARERR); 530 if (status & SGE_FRAMINGERR) 531 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 532 status & SGE_FRAMINGERR); 533 if (status & F_RSPQCREDITOVERFOW) 534 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 535 536 if (status & F_RSPQDISABLED) { 537 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 538 539 CH_ALERT(adapter, 540 "packet delivered to disabled response queue (0x%x)\n", 541 (v >> S_RSPQ0DISABLED) & 0xff); 542 } 543 544 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 545 if (status & SGE_FATALERR) 546 t3_fatal_err(adapter); 547} 548 549void 550t3_sge_prep(adapter_t *adap, struct sge_params *p) 551{ 552 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; 553 554 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); 555 nqsets *= adap->params.nports; 556 557 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 558 559 while (!powerof2(fl_q_size)) 560 fl_q_size--; 561 562 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : 563 is_offload(adap); 564 565#if __FreeBSD_version >= 700111 566 if (use_16k) { 567 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 568 jumbo_buf_size = MJUM16BYTES; 569 } else { 570 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 571 jumbo_buf_size = MJUM9BYTES; 572 } 573#else 574 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE); 575 jumbo_buf_size = MJUMPAGESIZE; 576#endif 577 while (!powerof2(jumbo_q_size)) 578 jumbo_q_size--; 579 580 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 581 device_printf(adap->dev, 582 "Insufficient clusters and/or jumbo buffers.\n"); 583 584 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); 585 586 for (i = 0; i < SGE_QSETS; ++i) { 587 struct qset_params *q = p->qset + i; 588 589 if (adap->params.nports > 2) { 590 q->coalesce_usecs = 50; 591 } else { 592#ifdef INVARIANTS 593 q->coalesce_usecs = 10; 594#else 595 q->coalesce_usecs = 5; 596#endif 597 } 598 q->polling = 0; 599 q->rspq_size = RSPQ_Q_SIZE; 600 q->fl_size = fl_q_size; 601 q->jumbo_size = jumbo_q_size; 602 q->jumbo_buf_size = jumbo_buf_size; 603 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 604 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; 605 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; 606 q->cong_thres = 0; 607 } 608} 609 610int 611t3_sge_alloc(adapter_t *sc) 612{ 613 614 /* The parent tag. */ 615 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */ 616 1, 0, /* algnmnt, boundary */ 617 BUS_SPACE_MAXADDR, /* lowaddr */ 618 BUS_SPACE_MAXADDR, /* highaddr */ 619 NULL, NULL, /* filter, filterarg */ 620 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 621 BUS_SPACE_UNRESTRICTED, /* nsegments */ 622 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 623 0, /* flags */ 624 NULL, NULL, /* lock, lockarg */ 625 &sc->parent_dmat)) { 626 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 627 return (ENOMEM); 628 } 629 630 /* 631 * DMA tag for normal sized RX frames 632 */ 633 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 634 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 635 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 636 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 637 return (ENOMEM); 638 } 639 640 /* 641 * DMA tag for jumbo sized RX frames. 642 */ 643 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 644 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 645 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 646 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 647 return (ENOMEM); 648 } 649 650 /* 651 * DMA tag for TX frames. 652 */ 653 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 654 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 655 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 656 NULL, NULL, &sc->tx_dmat)) { 657 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 658 return (ENOMEM); 659 } 660 661 return (0); 662} 663 664int 665t3_sge_free(struct adapter * sc) 666{ 667 668 if (sc->tx_dmat != NULL) 669 bus_dma_tag_destroy(sc->tx_dmat); 670 671 if (sc->rx_jumbo_dmat != NULL) 672 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 673 674 if (sc->rx_dmat != NULL) 675 bus_dma_tag_destroy(sc->rx_dmat); 676 677 if (sc->parent_dmat != NULL) 678 bus_dma_tag_destroy(sc->parent_dmat); 679 680 return (0); 681} 682 683void 684t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 685{ 686 687 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 688 qs->rspq.polling = 0 /* p->polling */; 689} 690 691#if !defined(__i386__) && !defined(__amd64__) 692static void 693refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 694{ 695 struct refill_fl_cb_arg *cb_arg = arg; 696 697 cb_arg->error = error; 698 cb_arg->seg = segs[0]; 699 cb_arg->nseg = nseg; 700 701} 702#endif 703/** 704 * refill_fl - refill an SGE free-buffer list 705 * @sc: the controller softc 706 * @q: the free-list to refill 707 * @n: the number of new buffers to allocate 708 * 709 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 710 * The caller must assure that @n does not exceed the queue's capacity. 711 */ 712static void 713refill_fl(adapter_t *sc, struct sge_fl *q, int n) 714{ 715 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 716 struct rx_desc *d = &q->desc[q->pidx]; 717 struct refill_fl_cb_arg cb_arg; 718 struct mbuf *m; 719 caddr_t cl; 720 int err; 721 722 cb_arg.error = 0; 723 while (n--) { 724 /* 725 * We allocate an uninitialized mbuf + cluster, mbuf is 726 * initialized after rx. 727 */ 728 if (q->zone == zone_pack) { 729 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 730 break; 731 cl = m->m_ext.ext_buf; 732 } else { 733 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 734 break; 735 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 736 uma_zfree(q->zone, cl); 737 break; 738 } 739 } 740 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 741 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 742 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 743 uma_zfree(q->zone, cl); 744 goto done; 745 } 746 sd->flags |= RX_SW_DESC_MAP_CREATED; 747 } 748#if !defined(__i386__) && !defined(__amd64__) 749 err = bus_dmamap_load(q->entry_tag, sd->map, 750 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 751 752 if (err != 0 || cb_arg.error) { 753 if (q->zone != zone_pack) 754 uma_zfree(q->zone, cl); 755 m_free(m); 756 goto done; 757 } 758#else 759 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 760#endif 761 sd->flags |= RX_SW_DESC_INUSE; 762 sd->rxsd_cl = cl; 763 sd->m = m; 764 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 765 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 766 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 767 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 768 769 d++; 770 sd++; 771 772 if (++q->pidx == q->size) { 773 q->pidx = 0; 774 q->gen ^= 1; 775 sd = q->sdesc; 776 d = q->desc; 777 } 778 q->credits++; 779 q->db_pending++; 780 } 781 782done: 783 if (q->db_pending >= 32) { 784 q->db_pending = 0; 785 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 786 } 787} 788 789 790/** 791 * free_rx_bufs - free the Rx buffers on an SGE free list 792 * @sc: the controle softc 793 * @q: the SGE free list to clean up 794 * 795 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 796 * this queue should be stopped before calling this function. 797 */ 798static void 799free_rx_bufs(adapter_t *sc, struct sge_fl *q) 800{ 801 u_int cidx = q->cidx; 802 803 while (q->credits--) { 804 struct rx_sw_desc *d = &q->sdesc[cidx]; 805 806 if (d->flags & RX_SW_DESC_INUSE) { 807 bus_dmamap_unload(q->entry_tag, d->map); 808 bus_dmamap_destroy(q->entry_tag, d->map); 809 if (q->zone == zone_pack) { 810 m_init(d->m, M_NOWAIT, MT_DATA, M_EXT); 811 uma_zfree(zone_pack, d->m); 812 } else { 813 m_init(d->m, M_NOWAIT, MT_DATA, 0); 814 uma_zfree(zone_mbuf, d->m); 815 uma_zfree(q->zone, d->rxsd_cl); 816 } 817 } 818 819 d->rxsd_cl = NULL; 820 d->m = NULL; 821 if (++cidx == q->size) 822 cidx = 0; 823 } 824} 825 826static __inline void 827__refill_fl(adapter_t *adap, struct sge_fl *fl) 828{ 829 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 830} 831 832static __inline void 833__refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 834{ 835 uint32_t reclaimable = fl->size - fl->credits; 836 837 if (reclaimable > 0) 838 refill_fl(adap, fl, min(max, reclaimable)); 839} 840 841/** 842 * recycle_rx_buf - recycle a receive buffer 843 * @adapter: the adapter 844 * @q: the SGE free list 845 * @idx: index of buffer to recycle 846 * 847 * Recycles the specified buffer on the given free list by adding it at 848 * the next available slot on the list. 849 */ 850static void 851recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 852{ 853 struct rx_desc *from = &q->desc[idx]; 854 struct rx_desc *to = &q->desc[q->pidx]; 855 856 q->sdesc[q->pidx] = q->sdesc[idx]; 857 to->addr_lo = from->addr_lo; // already big endian 858 to->addr_hi = from->addr_hi; // likewise 859 wmb(); /* necessary ? */ 860 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 861 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 862 q->credits++; 863 864 if (++q->pidx == q->size) { 865 q->pidx = 0; 866 q->gen ^= 1; 867 } 868 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 869} 870 871static void 872alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 873{ 874 uint32_t *addr; 875 876 addr = arg; 877 *addr = segs[0].ds_addr; 878} 879 880static int 881alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 882 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 883 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 884{ 885 size_t len = nelem * elem_size; 886 void *s = NULL; 887 void *p = NULL; 888 int err; 889 890 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 891 BUS_SPACE_MAXADDR_32BIT, 892 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 893 len, 0, NULL, NULL, tag)) != 0) { 894 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 895 return (ENOMEM); 896 } 897 898 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 899 map)) != 0) { 900 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 901 return (ENOMEM); 902 } 903 904 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 905 bzero(p, len); 906 *(void **)desc = p; 907 908 if (sw_size) { 909 len = nelem * sw_size; 910 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 911 *(void **)sdesc = s; 912 } 913 if (parent_entry_tag == NULL) 914 return (0); 915 916 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 917 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 918 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 919 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 920 NULL, NULL, entry_tag)) != 0) { 921 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 922 return (ENOMEM); 923 } 924 return (0); 925} 926 927static void 928sge_slow_intr_handler(void *arg, int ncount) 929{ 930 adapter_t *sc = arg; 931 932 t3_slow_intr_handler(sc); 933 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); 934 (void) t3_read_reg(sc, A_PL_INT_ENABLE0); 935} 936 937/** 938 * sge_timer_cb - perform periodic maintenance of an SGE qset 939 * @data: the SGE queue set to maintain 940 * 941 * Runs periodically from a timer to perform maintenance of an SGE queue 942 * set. It performs two tasks: 943 * 944 * a) Cleans up any completed Tx descriptors that may still be pending. 945 * Normal descriptor cleanup happens when new packets are added to a Tx 946 * queue so this timer is relatively infrequent and does any cleanup only 947 * if the Tx queue has not seen any new packets in a while. We make a 948 * best effort attempt to reclaim descriptors, in that we don't wait 949 * around if we cannot get a queue's lock (which most likely is because 950 * someone else is queueing new packets and so will also handle the clean 951 * up). Since control queues use immediate data exclusively we don't 952 * bother cleaning them up here. 953 * 954 * b) Replenishes Rx queues that have run out due to memory shortage. 955 * Normally new Rx buffers are added when existing ones are consumed but 956 * when out of memory a queue can become empty. We try to add only a few 957 * buffers here, the queue will be replenished fully as these new buffers 958 * are used up if memory shortage has subsided. 959 * 960 * c) Return coalesced response queue credits in case a response queue is 961 * starved. 962 * 963 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 964 * fifo overflows and the FW doesn't implement any recovery scheme yet. 965 */ 966static void 967sge_timer_cb(void *arg) 968{ 969 adapter_t *sc = arg; 970 if ((sc->flags & USING_MSIX) == 0) { 971 972 struct port_info *pi; 973 struct sge_qset *qs; 974 struct sge_txq *txq; 975 int i, j; 976 int reclaim_ofl, refill_rx; 977 978 if (sc->open_device_map == 0) 979 return; 980 981 for (i = 0; i < sc->params.nports; i++) { 982 pi = &sc->port[i]; 983 for (j = 0; j < pi->nqsets; j++) { 984 qs = &sc->sge.qs[pi->first_qset + j]; 985 txq = &qs->txq[0]; 986 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 987 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 988 (qs->fl[1].credits < qs->fl[1].size)); 989 if (reclaim_ofl || refill_rx) { 990 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 991 break; 992 } 993 } 994 } 995 } 996 997 if (sc->params.nports > 2) { 998 int i; 999 1000 for_each_port(sc, i) { 1001 struct port_info *pi = &sc->port[i]; 1002 1003 t3_write_reg(sc, A_SG_KDOORBELL, 1004 F_SELEGRCNTX | 1005 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 1006 } 1007 } 1008 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 1009 sc->open_device_map != 0) 1010 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1011} 1012 1013/* 1014 * This is meant to be a catch-all function to keep sge state private 1015 * to sge.c 1016 * 1017 */ 1018int 1019t3_sge_init_adapter(adapter_t *sc) 1020{ 1021 callout_init(&sc->sge_timer_ch, 1); 1022 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1023 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 1024 return (0); 1025} 1026 1027int 1028t3_sge_reset_adapter(adapter_t *sc) 1029{ 1030 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1031 return (0); 1032} 1033 1034int 1035t3_sge_init_port(struct port_info *pi) 1036{ 1037 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 1038 return (0); 1039} 1040 1041/** 1042 * refill_rspq - replenish an SGE response queue 1043 * @adapter: the adapter 1044 * @q: the response queue to replenish 1045 * @credits: how many new responses to make available 1046 * 1047 * Replenishes a response queue by making the supplied number of responses 1048 * available to HW. 1049 */ 1050static __inline void 1051refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1052{ 1053 1054 /* mbufs are allocated on demand when a rspq entry is processed. */ 1055 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1056 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1057} 1058 1059static void 1060sge_txq_reclaim_handler(void *arg, int ncount) 1061{ 1062 struct sge_qset *qs = arg; 1063 int i; 1064 1065 for (i = 0; i < 3; i++) 1066 reclaim_completed_tx(qs, 16, i); 1067} 1068 1069static void 1070sge_timer_reclaim(void *arg, int ncount) 1071{ 1072 struct port_info *pi = arg; 1073 int i, nqsets = pi->nqsets; 1074 adapter_t *sc = pi->adapter; 1075 struct sge_qset *qs; 1076 struct mtx *lock; 1077 1078 KASSERT((sc->flags & USING_MSIX) == 0, 1079 ("can't call timer reclaim for msi-x")); 1080 1081 for (i = 0; i < nqsets; i++) { 1082 qs = &sc->sge.qs[pi->first_qset + i]; 1083 1084 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1085 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1086 &sc->sge.qs[0].rspq.lock; 1087 1088 if (mtx_trylock(lock)) { 1089 /* XXX currently assume that we are *NOT* polling */ 1090 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1091 1092 if (qs->fl[0].credits < qs->fl[0].size - 16) 1093 __refill_fl(sc, &qs->fl[0]); 1094 if (qs->fl[1].credits < qs->fl[1].size - 16) 1095 __refill_fl(sc, &qs->fl[1]); 1096 1097 if (status & (1 << qs->rspq.cntxt_id)) { 1098 if (qs->rspq.credits) { 1099 refill_rspq(sc, &qs->rspq, 1); 1100 qs->rspq.credits--; 1101 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1102 1 << qs->rspq.cntxt_id); 1103 } 1104 } 1105 mtx_unlock(lock); 1106 } 1107 } 1108} 1109 1110/** 1111 * init_qset_cntxt - initialize an SGE queue set context info 1112 * @qs: the queue set 1113 * @id: the queue set id 1114 * 1115 * Initializes the TIDs and context ids for the queues of a queue set. 1116 */ 1117static void 1118init_qset_cntxt(struct sge_qset *qs, u_int id) 1119{ 1120 1121 qs->rspq.cntxt_id = id; 1122 qs->fl[0].cntxt_id = 2 * id; 1123 qs->fl[1].cntxt_id = 2 * id + 1; 1124 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1125 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1126 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1127 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1128 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1129 1130 /* XXX: a sane limit is needed instead of INT_MAX */ 1131 mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX); 1132 mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX); 1133 mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX); 1134} 1135 1136 1137static void 1138txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1139{ 1140 txq->in_use += ndesc; 1141 /* 1142 * XXX we don't handle stopping of queue 1143 * presumably start handles this when we bump against the end 1144 */ 1145 txqs->gen = txq->gen; 1146 txq->unacked += ndesc; 1147 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1148 txq->unacked &= 31; 1149 txqs->pidx = txq->pidx; 1150 txq->pidx += ndesc; 1151#ifdef INVARIANTS 1152 if (((txqs->pidx > txq->cidx) && 1153 (txq->pidx < txqs->pidx) && 1154 (txq->pidx >= txq->cidx)) || 1155 ((txqs->pidx < txq->cidx) && 1156 (txq->pidx >= txq-> cidx)) || 1157 ((txqs->pidx < txq->cidx) && 1158 (txq->cidx < txqs->pidx))) 1159 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1160 txqs->pidx, txq->pidx, txq->cidx); 1161#endif 1162 if (txq->pidx >= txq->size) { 1163 txq->pidx -= txq->size; 1164 txq->gen ^= 1; 1165 } 1166 1167} 1168 1169/** 1170 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1171 * @m: the packet mbufs 1172 * @nsegs: the number of segments 1173 * 1174 * Returns the number of Tx descriptors needed for the given Ethernet 1175 * packet. Ethernet packets require addition of WR and CPL headers. 1176 */ 1177static __inline unsigned int 1178calc_tx_descs(const struct mbuf *m, int nsegs) 1179{ 1180 unsigned int flits; 1181 1182 if (m->m_pkthdr.len <= PIO_LEN) 1183 return 1; 1184 1185 flits = sgl_len(nsegs) + 2; 1186 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1187 flits++; 1188 1189 return flits_to_desc(flits); 1190} 1191 1192/** 1193 * make_sgl - populate a scatter/gather list for a packet 1194 * @sgp: the SGL to populate 1195 * @segs: the packet dma segments 1196 * @nsegs: the number of segments 1197 * 1198 * Generates a scatter/gather list for the buffers that make up a packet 1199 * and returns the SGL size in 8-byte words. The caller must size the SGL 1200 * appropriately. 1201 */ 1202static __inline void 1203make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1204{ 1205 int i, idx; 1206 1207 for (idx = 0, i = 0; i < nsegs; i++) { 1208 /* 1209 * firmware doesn't like empty segments 1210 */ 1211 if (segs[i].ds_len == 0) 1212 continue; 1213 if (i && idx == 0) 1214 ++sgp; 1215 1216 sgp->len[idx] = htobe32(segs[i].ds_len); 1217 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1218 idx ^= 1; 1219 } 1220 1221 if (idx) { 1222 sgp->len[idx] = 0; 1223 sgp->addr[idx] = 0; 1224 } 1225} 1226 1227/** 1228 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1229 * @adap: the adapter 1230 * @q: the Tx queue 1231 * 1232 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1233 * where the HW is going to sleep just after we checked, however, 1234 * then the interrupt handler will detect the outstanding TX packet 1235 * and ring the doorbell for us. 1236 * 1237 * When GTS is disabled we unconditionally ring the doorbell. 1238 */ 1239static __inline void 1240check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) 1241{ 1242#if USE_GTS 1243 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1244 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1245 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1246#ifdef T3_TRACE 1247 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1248 q->cntxt_id); 1249#endif 1250 t3_write_reg(adap, A_SG_KDOORBELL, 1251 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1252 } 1253#else 1254 if (mustring || ++q->db_pending >= 32) { 1255 wmb(); /* write descriptors before telling HW */ 1256 t3_write_reg(adap, A_SG_KDOORBELL, 1257 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1258 q->db_pending = 0; 1259 } 1260#endif 1261} 1262 1263static __inline void 1264wr_gen2(struct tx_desc *d, unsigned int gen) 1265{ 1266#if SGE_NUM_GENBITS == 2 1267 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1268#endif 1269} 1270 1271/** 1272 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1273 * @ndesc: number of Tx descriptors spanned by the SGL 1274 * @txd: first Tx descriptor to be written 1275 * @txqs: txq state (generation and producer index) 1276 * @txq: the SGE Tx queue 1277 * @sgl: the SGL 1278 * @flits: number of flits to the start of the SGL in the first descriptor 1279 * @sgl_flits: the SGL size in flits 1280 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1281 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1282 * 1283 * Write a work request header and an associated SGL. If the SGL is 1284 * small enough to fit into one Tx descriptor it has already been written 1285 * and we just need to write the WR header. Otherwise we distribute the 1286 * SGL across the number of descriptors it spans. 1287 */ 1288static void 1289write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1290 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1291 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1292{ 1293 1294 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1295 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1296 1297 if (__predict_true(ndesc == 1)) { 1298 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1299 V_WR_SGLSFLT(flits)) | wr_hi, 1300 htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) | 1301 wr_lo); 1302 1303 wr_gen2(txd, txqs->gen); 1304 1305 } else { 1306 unsigned int ogen = txqs->gen; 1307 const uint64_t *fp = (const uint64_t *)sgl; 1308 struct work_request_hdr *wp = wrp; 1309 1310 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1311 V_WR_SGLSFLT(flits)) | wr_hi; 1312 1313 while (sgl_flits) { 1314 unsigned int avail = WR_FLITS - flits; 1315 1316 if (avail > sgl_flits) 1317 avail = sgl_flits; 1318 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1319 sgl_flits -= avail; 1320 ndesc--; 1321 if (!sgl_flits) 1322 break; 1323 1324 fp += avail; 1325 txd++; 1326 txsd++; 1327 if (++txqs->pidx == txq->size) { 1328 txqs->pidx = 0; 1329 txqs->gen ^= 1; 1330 txd = txq->desc; 1331 txsd = txq->sdesc; 1332 } 1333 1334 /* 1335 * when the head of the mbuf chain 1336 * is freed all clusters will be freed 1337 * with it 1338 */ 1339 wrp = (struct work_request_hdr *)txd; 1340 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1341 V_WR_SGLSFLT(1)) | wr_hi; 1342 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1343 sgl_flits + 1)) | 1344 V_WR_GEN(txqs->gen)) | wr_lo; 1345 wr_gen2(txd, txqs->gen); 1346 flits = 1; 1347 } 1348 wrp->wrh_hi |= htonl(F_WR_EOP); 1349 wmb(); 1350 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1351 wr_gen2((struct tx_desc *)wp, ogen); 1352 } 1353} 1354 1355/* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ 1356#define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) 1357 1358#define GET_VTAG(cntrl, m) \ 1359do { \ 1360 if ((m)->m_flags & M_VLANTAG) \ 1361 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1362} while (0) 1363 1364static int 1365t3_encap(struct sge_qset *qs, struct mbuf **m) 1366{ 1367 adapter_t *sc; 1368 struct mbuf *m0; 1369 struct sge_txq *txq; 1370 struct txq_state txqs; 1371 struct port_info *pi; 1372 unsigned int ndesc, flits, cntrl, mlen; 1373 int err, nsegs, tso_info = 0; 1374 1375 struct work_request_hdr *wrp; 1376 struct tx_sw_desc *txsd; 1377 struct sg_ent *sgp, *sgl; 1378 uint32_t wr_hi, wr_lo, sgl_flits; 1379 bus_dma_segment_t segs[TX_MAX_SEGS]; 1380 1381 struct tx_desc *txd; 1382 1383 pi = qs->port; 1384 sc = pi->adapter; 1385 txq = &qs->txq[TXQ_ETH]; 1386 txd = &txq->desc[txq->pidx]; 1387 txsd = &txq->sdesc[txq->pidx]; 1388 sgl = txq->txq_sgl; 1389 1390 prefetch(txd); 1391 m0 = *m; 1392 1393 mtx_assert(&qs->lock, MA_OWNED); 1394 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1395 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1396 1397 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1398 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1399 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1400 1401 if (m0->m_nextpkt != NULL) { 1402 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1403 ndesc = 1; 1404 mlen = 0; 1405 } else { 1406 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1407 &m0, segs, &nsegs))) { 1408 if (cxgb_debug) 1409 printf("failed ... err=%d\n", err); 1410 return (err); 1411 } 1412 mlen = m0->m_pkthdr.len; 1413 ndesc = calc_tx_descs(m0, nsegs); 1414 } 1415 txq_prod(txq, ndesc, &txqs); 1416 1417 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1418 txsd->m = m0; 1419 1420 if (m0->m_nextpkt != NULL) { 1421 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1422 int i, fidx; 1423 1424 if (nsegs > 7) 1425 panic("trying to coalesce %d packets in to one WR", nsegs); 1426 txq->txq_coalesced += nsegs; 1427 wrp = (struct work_request_hdr *)txd; 1428 flits = nsegs*2 + 1; 1429 1430 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1431 struct cpl_tx_pkt_batch_entry *cbe; 1432 uint64_t flit; 1433 uint32_t *hflit = (uint32_t *)&flit; 1434 int cflags = m0->m_pkthdr.csum_flags; 1435 1436 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1437 GET_VTAG(cntrl, m0); 1438 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1439 if (__predict_false(!(cflags & CSUM_IP))) 1440 cntrl |= F_TXPKT_IPCSUM_DIS; 1441 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP | 1442 CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) 1443 cntrl |= F_TXPKT_L4CSUM_DIS; 1444 1445 hflit[0] = htonl(cntrl); 1446 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1447 flit |= htobe64(1 << 24); 1448 cbe = &cpl_batch->pkt_entry[i]; 1449 cbe->cntrl = hflit[0]; 1450 cbe->len = hflit[1]; 1451 cbe->addr = htobe64(segs[i].ds_addr); 1452 } 1453 1454 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1455 V_WR_SGLSFLT(flits)) | 1456 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1457 wr_lo = htonl(V_WR_LEN(flits) | 1458 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1459 set_wr_hdr(wrp, wr_hi, wr_lo); 1460 wmb(); 1461 ETHER_BPF_MTAP(pi->ifp, m0); 1462 wr_gen2(txd, txqs.gen); 1463 check_ring_tx_db(sc, txq, 0); 1464 return (0); 1465 } else if (tso_info) { 1466 uint16_t eth_type; 1467 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1468 struct ether_header *eh; 1469 void *l3hdr; 1470 struct tcphdr *tcp; 1471 1472 txd->flit[2] = 0; 1473 GET_VTAG(cntrl, m0); 1474 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1475 hdr->cntrl = htonl(cntrl); 1476 hdr->len = htonl(mlen | 0x80000000); 1477 1478 if (__predict_false(mlen < TCPPKTHDRSIZE)) { 1479 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x", 1480 m0, mlen, m0->m_pkthdr.tso_segsz, 1481 (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags); 1482 panic("tx tso packet too small"); 1483 } 1484 1485 /* Make sure that ether, ip, tcp headers are all in m0 */ 1486 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1487 m0 = m_pullup(m0, TCPPKTHDRSIZE); 1488 if (__predict_false(m0 == NULL)) { 1489 /* XXX panic probably an overreaction */ 1490 panic("couldn't fit header into mbuf"); 1491 } 1492 } 1493 1494 eh = mtod(m0, struct ether_header *); 1495 eth_type = eh->ether_type; 1496 if (eth_type == htons(ETHERTYPE_VLAN)) { 1497 struct ether_vlan_header *evh = (void *)eh; 1498 1499 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); 1500 l3hdr = evh + 1; 1501 eth_type = evh->evl_proto; 1502 } else { 1503 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); 1504 l3hdr = eh + 1; 1505 } 1506 1507 if (eth_type == htons(ETHERTYPE_IP)) { 1508 struct ip *ip = l3hdr; 1509 1510 tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); 1511 tcp = (struct tcphdr *)(ip + 1); 1512 } else if (eth_type == htons(ETHERTYPE_IPV6)) { 1513 struct ip6_hdr *ip6 = l3hdr; 1514 1515 KASSERT(ip6->ip6_nxt == IPPROTO_TCP, 1516 ("%s: CSUM_TSO with ip6_nxt %d", 1517 __func__, ip6->ip6_nxt)); 1518 1519 tso_info |= F_LSO_IPV6; 1520 tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); 1521 tcp = (struct tcphdr *)(ip6 + 1); 1522 } else 1523 panic("%s: CSUM_TSO but neither ip nor ip6", __func__); 1524 1525 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); 1526 hdr->lso_info = htonl(tso_info); 1527 1528 if (__predict_false(mlen <= PIO_LEN)) { 1529 /* 1530 * pkt not undersized but fits in PIO_LEN 1531 * Indicates a TSO bug at the higher levels. 1532 */ 1533 txsd->m = NULL; 1534 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1535 flits = (mlen + 7) / 8 + 3; 1536 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1537 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1538 F_WR_SOP | F_WR_EOP | txqs.compl); 1539 wr_lo = htonl(V_WR_LEN(flits) | 1540 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1541 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1542 wmb(); 1543 ETHER_BPF_MTAP(pi->ifp, m0); 1544 wr_gen2(txd, txqs.gen); 1545 check_ring_tx_db(sc, txq, 0); 1546 m_freem(m0); 1547 return (0); 1548 } 1549 flits = 3; 1550 } else { 1551 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1552 1553 GET_VTAG(cntrl, m0); 1554 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1555 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1556 cntrl |= F_TXPKT_IPCSUM_DIS; 1557 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | 1558 CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) 1559 cntrl |= F_TXPKT_L4CSUM_DIS; 1560 cpl->cntrl = htonl(cntrl); 1561 cpl->len = htonl(mlen | 0x80000000); 1562 1563 if (mlen <= PIO_LEN) { 1564 txsd->m = NULL; 1565 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1566 flits = (mlen + 7) / 8 + 2; 1567 1568 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1569 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1570 F_WR_SOP | F_WR_EOP | txqs.compl); 1571 wr_lo = htonl(V_WR_LEN(flits) | 1572 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1573 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1574 wmb(); 1575 ETHER_BPF_MTAP(pi->ifp, m0); 1576 wr_gen2(txd, txqs.gen); 1577 check_ring_tx_db(sc, txq, 0); 1578 m_freem(m0); 1579 return (0); 1580 } 1581 flits = 2; 1582 } 1583 wrp = (struct work_request_hdr *)txd; 1584 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1585 make_sgl(sgp, segs, nsegs); 1586 1587 sgl_flits = sgl_len(nsegs); 1588 1589 ETHER_BPF_MTAP(pi->ifp, m0); 1590 1591 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1592 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1593 wr_lo = htonl(V_WR_TID(txq->token)); 1594 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1595 sgl_flits, wr_hi, wr_lo); 1596 check_ring_tx_db(sc, txq, 0); 1597 1598 return (0); 1599} 1600 1601#ifdef NETDUMP 1602int 1603cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m) 1604{ 1605 int error; 1606 1607 error = t3_encap(qs, m); 1608 if (error == 0) 1609 check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1); 1610 else if (*m != NULL) { 1611 m_freem(*m); 1612 *m = NULL; 1613 } 1614 return (error); 1615} 1616#endif 1617 1618void 1619cxgb_tx_watchdog(void *arg) 1620{ 1621 struct sge_qset *qs = arg; 1622 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1623 1624 if (qs->coalescing != 0 && 1625 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1626 TXQ_RING_EMPTY(qs)) 1627 qs->coalescing = 0; 1628 else if (qs->coalescing == 0 && 1629 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1630 qs->coalescing = 1; 1631 if (TXQ_TRYLOCK(qs)) { 1632 qs->qs_flags |= QS_FLUSHING; 1633 cxgb_start_locked(qs); 1634 qs->qs_flags &= ~QS_FLUSHING; 1635 TXQ_UNLOCK(qs); 1636 } 1637 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1638 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1639 qs, txq->txq_watchdog.c_cpu); 1640} 1641 1642static void 1643cxgb_tx_timeout(void *arg) 1644{ 1645 struct sge_qset *qs = arg; 1646 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1647 1648 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1649 qs->coalescing = 1; 1650 if (TXQ_TRYLOCK(qs)) { 1651 qs->qs_flags |= QS_TIMEOUT; 1652 cxgb_start_locked(qs); 1653 qs->qs_flags &= ~QS_TIMEOUT; 1654 TXQ_UNLOCK(qs); 1655 } 1656} 1657 1658static void 1659cxgb_start_locked(struct sge_qset *qs) 1660{ 1661 struct mbuf *m_head = NULL; 1662 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1663 struct port_info *pi = qs->port; 1664 struct ifnet *ifp = pi->ifp; 1665 1666 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1667 reclaim_completed_tx(qs, 0, TXQ_ETH); 1668 1669 if (!pi->link_config.link_ok) { 1670 TXQ_RING_FLUSH(qs); 1671 return; 1672 } 1673 TXQ_LOCK_ASSERT(qs); 1674 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1675 pi->link_config.link_ok) { 1676 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1677 1678 if (txq->size - txq->in_use <= TX_MAX_DESC) 1679 break; 1680 1681 if ((m_head = cxgb_dequeue(qs)) == NULL) 1682 break; 1683 /* 1684 * Encapsulation can modify our pointer, and or make it 1685 * NULL on failure. In that event, we can't requeue. 1686 */ 1687 if (t3_encap(qs, &m_head) || m_head == NULL) 1688 break; 1689 1690 m_head = NULL; 1691 } 1692 1693 if (txq->db_pending) 1694 check_ring_tx_db(pi->adapter, txq, 1); 1695 1696 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1697 pi->link_config.link_ok) 1698 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1699 qs, txq->txq_timer.c_cpu); 1700 if (m_head != NULL) 1701 m_freem(m_head); 1702} 1703 1704static int 1705cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1706{ 1707 struct port_info *pi = qs->port; 1708 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1709 struct buf_ring *br = txq->txq_mr; 1710 int error, avail; 1711 1712 avail = txq->size - txq->in_use; 1713 TXQ_LOCK_ASSERT(qs); 1714 1715 /* 1716 * We can only do a direct transmit if the following are true: 1717 * - we aren't coalescing (ring < 3/4 full) 1718 * - the link is up -- checked in caller 1719 * - there are no packets enqueued already 1720 * - there is space in hardware transmit queue 1721 */ 1722 if (check_pkt_coalesce(qs) == 0 && 1723 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { 1724 if (t3_encap(qs, &m)) { 1725 if (m != NULL && 1726 (error = drbr_enqueue(ifp, br, m)) != 0) 1727 return (error); 1728 } else { 1729 if (txq->db_pending) 1730 check_ring_tx_db(pi->adapter, txq, 1); 1731 1732 /* 1733 * We've bypassed the buf ring so we need to update 1734 * the stats directly 1735 */ 1736 txq->txq_direct_packets++; 1737 txq->txq_direct_bytes += m->m_pkthdr.len; 1738 } 1739 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1740 return (error); 1741 1742 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1743 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1744 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1745 cxgb_start_locked(qs); 1746 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1747 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1748 qs, txq->txq_timer.c_cpu); 1749 return (0); 1750} 1751 1752int 1753cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1754{ 1755 struct sge_qset *qs; 1756 struct port_info *pi = ifp->if_softc; 1757 int error, qidx = pi->first_qset; 1758 1759 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1760 ||(!pi->link_config.link_ok)) { 1761 m_freem(m); 1762 return (0); 1763 } 1764 1765 /* check if flowid is set */ 1766 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 1767 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1768 1769 qs = &pi->adapter->sge.qs[qidx]; 1770 1771 if (TXQ_TRYLOCK(qs)) { 1772 /* XXX running */ 1773 error = cxgb_transmit_locked(ifp, qs, m); 1774 TXQ_UNLOCK(qs); 1775 } else 1776 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1777 return (error); 1778} 1779 1780void 1781cxgb_qflush(struct ifnet *ifp) 1782{ 1783 /* 1784 * flush any enqueued mbufs in the buf_rings 1785 * and in the transmit queues 1786 * no-op for now 1787 */ 1788 return; 1789} 1790 1791/** 1792 * write_imm - write a packet into a Tx descriptor as immediate data 1793 * @d: the Tx descriptor to write 1794 * @m: the packet 1795 * @len: the length of packet data to write as immediate data 1796 * @gen: the generation bit value to write 1797 * 1798 * Writes a packet as immediate data into a Tx descriptor. The packet 1799 * contains a work request at its beginning. We must write the packet 1800 * carefully so the SGE doesn't read accidentally before it's written in 1801 * its entirety. 1802 */ 1803static __inline void 1804write_imm(struct tx_desc *d, caddr_t src, 1805 unsigned int len, unsigned int gen) 1806{ 1807 struct work_request_hdr *from = (struct work_request_hdr *)src; 1808 struct work_request_hdr *to = (struct work_request_hdr *)d; 1809 uint32_t wr_hi, wr_lo; 1810 1811 KASSERT(len <= WR_LEN && len >= sizeof(*from), 1812 ("%s: invalid len %d", __func__, len)); 1813 1814 memcpy(&to[1], &from[1], len - sizeof(*from)); 1815 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1816 V_WR_BCNTLFLT(len & 7)); 1817 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); 1818 set_wr_hdr(to, wr_hi, wr_lo); 1819 wmb(); 1820 wr_gen2(d, gen); 1821} 1822 1823/** 1824 * check_desc_avail - check descriptor availability on a send queue 1825 * @adap: the adapter 1826 * @q: the TX queue 1827 * @m: the packet needing the descriptors 1828 * @ndesc: the number of Tx descriptors needed 1829 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1830 * 1831 * Checks if the requested number of Tx descriptors is available on an 1832 * SGE send queue. If the queue is already suspended or not enough 1833 * descriptors are available the packet is queued for later transmission. 1834 * Must be called with the Tx queue locked. 1835 * 1836 * Returns 0 if enough descriptors are available, 1 if there aren't 1837 * enough descriptors and the packet has been queued, and 2 if the caller 1838 * needs to retry because there weren't enough descriptors at the 1839 * beginning of the call but some freed up in the mean time. 1840 */ 1841static __inline int 1842check_desc_avail(adapter_t *adap, struct sge_txq *q, 1843 struct mbuf *m, unsigned int ndesc, 1844 unsigned int qid) 1845{ 1846 /* 1847 * XXX We currently only use this for checking the control queue 1848 * the control queue is only used for binding qsets which happens 1849 * at init time so we are guaranteed enough descriptors 1850 */ 1851 if (__predict_false(mbufq_len(&q->sendq))) { 1852addq_exit: (void )mbufq_enqueue(&q->sendq, m); 1853 return 1; 1854 } 1855 if (__predict_false(q->size - q->in_use < ndesc)) { 1856 1857 struct sge_qset *qs = txq_to_qset(q, qid); 1858 1859 setbit(&qs->txq_stopped, qid); 1860 if (should_restart_tx(q) && 1861 test_and_clear_bit(qid, &qs->txq_stopped)) 1862 return 2; 1863 1864 q->stops++; 1865 goto addq_exit; 1866 } 1867 return 0; 1868} 1869 1870 1871/** 1872 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1873 * @q: the SGE control Tx queue 1874 * 1875 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1876 * that send only immediate data (presently just the control queues) and 1877 * thus do not have any mbufs 1878 */ 1879static __inline void 1880reclaim_completed_tx_imm(struct sge_txq *q) 1881{ 1882 unsigned int reclaim = q->processed - q->cleaned; 1883 1884 q->in_use -= reclaim; 1885 q->cleaned += reclaim; 1886} 1887 1888/** 1889 * ctrl_xmit - send a packet through an SGE control Tx queue 1890 * @adap: the adapter 1891 * @q: the control queue 1892 * @m: the packet 1893 * 1894 * Send a packet through an SGE control Tx queue. Packets sent through 1895 * a control queue must fit entirely as immediate data in a single Tx 1896 * descriptor and have no page fragments. 1897 */ 1898static int 1899ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1900{ 1901 int ret; 1902 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1903 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1904 1905 KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__)); 1906 1907 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1908 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1909 1910 TXQ_LOCK(qs); 1911again: reclaim_completed_tx_imm(q); 1912 1913 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1914 if (__predict_false(ret)) { 1915 if (ret == 1) { 1916 TXQ_UNLOCK(qs); 1917 return (ENOSPC); 1918 } 1919 goto again; 1920 } 1921 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); 1922 1923 q->in_use++; 1924 if (++q->pidx >= q->size) { 1925 q->pidx = 0; 1926 q->gen ^= 1; 1927 } 1928 TXQ_UNLOCK(qs); 1929 wmb(); 1930 t3_write_reg(adap, A_SG_KDOORBELL, 1931 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1932 1933 m_free(m); 1934 return (0); 1935} 1936 1937 1938/** 1939 * restart_ctrlq - restart a suspended control queue 1940 * @qs: the queue set cotaining the control queue 1941 * 1942 * Resumes transmission on a suspended Tx control queue. 1943 */ 1944static void 1945restart_ctrlq(void *data, int npending) 1946{ 1947 struct mbuf *m; 1948 struct sge_qset *qs = (struct sge_qset *)data; 1949 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1950 adapter_t *adap = qs->port->adapter; 1951 1952 TXQ_LOCK(qs); 1953again: reclaim_completed_tx_imm(q); 1954 1955 while (q->in_use < q->size && 1956 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1957 1958 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); 1959 m_free(m); 1960 1961 if (++q->pidx >= q->size) { 1962 q->pidx = 0; 1963 q->gen ^= 1; 1964 } 1965 q->in_use++; 1966 } 1967 if (mbufq_len(&q->sendq)) { 1968 setbit(&qs->txq_stopped, TXQ_CTRL); 1969 1970 if (should_restart_tx(q) && 1971 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1972 goto again; 1973 q->stops++; 1974 } 1975 TXQ_UNLOCK(qs); 1976 t3_write_reg(adap, A_SG_KDOORBELL, 1977 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1978} 1979 1980 1981/* 1982 * Send a management message through control queue 0 1983 */ 1984int 1985t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1986{ 1987 return ctrl_xmit(adap, &adap->sge.qs[0], m); 1988} 1989 1990/** 1991 * free_qset - free the resources of an SGE queue set 1992 * @sc: the controller owning the queue set 1993 * @q: the queue set 1994 * 1995 * Release the HW and SW resources associated with an SGE queue set, such 1996 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1997 * queue set must be quiesced prior to calling this. 1998 */ 1999static void 2000t3_free_qset(adapter_t *sc, struct sge_qset *q) 2001{ 2002 int i; 2003 2004 reclaim_completed_tx(q, 0, TXQ_ETH); 2005 if (q->txq[TXQ_ETH].txq_mr != NULL) 2006 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); 2007 if (q->txq[TXQ_ETH].txq_ifq != NULL) { 2008 ifq_delete(q->txq[TXQ_ETH].txq_ifq); 2009 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); 2010 } 2011 2012 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2013 if (q->fl[i].desc) { 2014 mtx_lock_spin(&sc->sge.reg_lock); 2015 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2016 mtx_unlock_spin(&sc->sge.reg_lock); 2017 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2018 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2019 q->fl[i].desc_map); 2020 bus_dma_tag_destroy(q->fl[i].desc_tag); 2021 bus_dma_tag_destroy(q->fl[i].entry_tag); 2022 } 2023 if (q->fl[i].sdesc) { 2024 free_rx_bufs(sc, &q->fl[i]); 2025 free(q->fl[i].sdesc, M_DEVBUF); 2026 } 2027 } 2028 2029 mtx_unlock(&q->lock); 2030 MTX_DESTROY(&q->lock); 2031 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2032 if (q->txq[i].desc) { 2033 mtx_lock_spin(&sc->sge.reg_lock); 2034 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2035 mtx_unlock_spin(&sc->sge.reg_lock); 2036 bus_dmamap_unload(q->txq[i].desc_tag, 2037 q->txq[i].desc_map); 2038 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2039 q->txq[i].desc_map); 2040 bus_dma_tag_destroy(q->txq[i].desc_tag); 2041 bus_dma_tag_destroy(q->txq[i].entry_tag); 2042 } 2043 if (q->txq[i].sdesc) { 2044 free(q->txq[i].sdesc, M_DEVBUF); 2045 } 2046 } 2047 2048 if (q->rspq.desc) { 2049 mtx_lock_spin(&sc->sge.reg_lock); 2050 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2051 mtx_unlock_spin(&sc->sge.reg_lock); 2052 2053 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2054 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2055 q->rspq.desc_map); 2056 bus_dma_tag_destroy(q->rspq.desc_tag); 2057 MTX_DESTROY(&q->rspq.lock); 2058 } 2059 2060#if defined(INET6) || defined(INET) 2061 tcp_lro_free(&q->lro.ctrl); 2062#endif 2063 2064 bzero(q, sizeof(*q)); 2065} 2066 2067/** 2068 * t3_free_sge_resources - free SGE resources 2069 * @sc: the adapter softc 2070 * 2071 * Frees resources used by the SGE queue sets. 2072 */ 2073void 2074t3_free_sge_resources(adapter_t *sc, int nqsets) 2075{ 2076 int i; 2077 2078 for (i = 0; i < nqsets; ++i) { 2079 TXQ_LOCK(&sc->sge.qs[i]); 2080 t3_free_qset(sc, &sc->sge.qs[i]); 2081 } 2082} 2083 2084/** 2085 * t3_sge_start - enable SGE 2086 * @sc: the controller softc 2087 * 2088 * Enables the SGE for DMAs. This is the last step in starting packet 2089 * transfers. 2090 */ 2091void 2092t3_sge_start(adapter_t *sc) 2093{ 2094 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2095} 2096 2097/** 2098 * t3_sge_stop - disable SGE operation 2099 * @sc: the adapter 2100 * 2101 * Disables the DMA engine. This can be called in emeregencies (e.g., 2102 * from error interrupts) or from normal process context. In the latter 2103 * case it also disables any pending queue restart tasklets. Note that 2104 * if it is called in interrupt context it cannot disable the restart 2105 * tasklets as it cannot wait, however the tasklets will have no effect 2106 * since the doorbells are disabled and the driver will call this again 2107 * later from process context, at which time the tasklets will be stopped 2108 * if they are still running. 2109 */ 2110void 2111t3_sge_stop(adapter_t *sc) 2112{ 2113 int i, nqsets; 2114 2115 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2116 2117 if (sc->tq == NULL) 2118 return; 2119 2120 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2121 nqsets += sc->port[i].nqsets; 2122#ifdef notyet 2123 /* 2124 * 2125 * XXX 2126 */ 2127 for (i = 0; i < nqsets; ++i) { 2128 struct sge_qset *qs = &sc->sge.qs[i]; 2129 2130 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2131 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2132 } 2133#endif 2134} 2135 2136/** 2137 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2138 * @adapter: the adapter 2139 * @q: the Tx queue to reclaim descriptors from 2140 * @reclaimable: the number of descriptors to reclaim 2141 * @m_vec_size: maximum number of buffers to reclaim 2142 * @desc_reclaimed: returns the number of descriptors reclaimed 2143 * 2144 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2145 * Tx buffers. Called with the Tx queue lock held. 2146 * 2147 * Returns number of buffers of reclaimed 2148 */ 2149void 2150t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2151{ 2152 struct tx_sw_desc *txsd; 2153 unsigned int cidx, mask; 2154 struct sge_txq *q = &qs->txq[queue]; 2155 2156#ifdef T3_TRACE 2157 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2158 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2159#endif 2160 cidx = q->cidx; 2161 mask = q->size - 1; 2162 txsd = &q->sdesc[cidx]; 2163 2164 mtx_assert(&qs->lock, MA_OWNED); 2165 while (reclaimable--) { 2166 prefetch(q->sdesc[(cidx + 1) & mask].m); 2167 prefetch(q->sdesc[(cidx + 2) & mask].m); 2168 2169 if (txsd->m != NULL) { 2170 if (txsd->flags & TX_SW_DESC_MAPPED) { 2171 bus_dmamap_unload(q->entry_tag, txsd->map); 2172 txsd->flags &= ~TX_SW_DESC_MAPPED; 2173 } 2174 m_freem_list(txsd->m); 2175 txsd->m = NULL; 2176 } else 2177 q->txq_skipped++; 2178 2179 ++txsd; 2180 if (++cidx == q->size) { 2181 cidx = 0; 2182 txsd = q->sdesc; 2183 } 2184 } 2185 q->cidx = cidx; 2186 2187} 2188 2189/** 2190 * is_new_response - check if a response is newly written 2191 * @r: the response descriptor 2192 * @q: the response queue 2193 * 2194 * Returns true if a response descriptor contains a yet unprocessed 2195 * response. 2196 */ 2197static __inline int 2198is_new_response(const struct rsp_desc *r, 2199 const struct sge_rspq *q) 2200{ 2201 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2202} 2203 2204#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2205#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2206 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2207 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2208 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2209 2210/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2211#define NOMEM_INTR_DELAY 2500 2212 2213#ifdef TCP_OFFLOAD 2214/** 2215 * write_ofld_wr - write an offload work request 2216 * @adap: the adapter 2217 * @m: the packet to send 2218 * @q: the Tx queue 2219 * @pidx: index of the first Tx descriptor to write 2220 * @gen: the generation value to use 2221 * @ndesc: number of descriptors the packet will occupy 2222 * 2223 * Write an offload work request to send the supplied packet. The packet 2224 * data already carry the work request with most fields populated. 2225 */ 2226static void 2227write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q, 2228 unsigned int pidx, unsigned int gen, unsigned int ndesc) 2229{ 2230 unsigned int sgl_flits, flits; 2231 int i, idx, nsegs, wrlen; 2232 struct work_request_hdr *from; 2233 struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1]; 2234 struct tx_desc *d = &q->desc[pidx]; 2235 struct txq_state txqs; 2236 struct sglist_seg *segs; 2237 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2238 struct sglist *sgl; 2239 2240 from = (void *)(oh + 1); /* Start of WR within mbuf */ 2241 wrlen = m->m_len - sizeof(*oh); 2242 2243 if (!(oh->flags & F_HDR_SGL)) { 2244 write_imm(d, (caddr_t)from, wrlen, gen); 2245 2246 /* 2247 * mbuf with "real" immediate tx data will be enqueue_wr'd by 2248 * t3_push_frames and freed in wr_ack. Others, like those sent 2249 * down by close_conn, t3_send_reset, etc. should be freed here. 2250 */ 2251 if (!(oh->flags & F_HDR_DF)) 2252 m_free(m); 2253 return; 2254 } 2255 2256 memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from)); 2257 2258 sgl = oh->sgl; 2259 flits = wrlen / 8; 2260 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl; 2261 2262 nsegs = sgl->sg_nseg; 2263 segs = sgl->sg_segs; 2264 for (idx = 0, i = 0; i < nsegs; i++) { 2265 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__)); 2266 if (i && idx == 0) 2267 ++sgp; 2268 sgp->len[idx] = htobe32(segs[i].ss_len); 2269 sgp->addr[idx] = htobe64(segs[i].ss_paddr); 2270 idx ^= 1; 2271 } 2272 if (idx) { 2273 sgp->len[idx] = 0; 2274 sgp->addr[idx] = 0; 2275 } 2276 2277 sgl_flits = sgl_len(nsegs); 2278 txqs.gen = gen; 2279 txqs.pidx = pidx; 2280 txqs.compl = 0; 2281 2282 write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits, 2283 from->wrh_hi, from->wrh_lo); 2284} 2285 2286/** 2287 * ofld_xmit - send a packet through an offload queue 2288 * @adap: the adapter 2289 * @q: the Tx offload queue 2290 * @m: the packet 2291 * 2292 * Send an offload packet through an SGE offload queue. 2293 */ 2294static int 2295ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2296{ 2297 int ret; 2298 unsigned int ndesc; 2299 unsigned int pidx, gen; 2300 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2301 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2302 2303 ndesc = G_HDR_NDESC(oh->flags); 2304 2305 TXQ_LOCK(qs); 2306again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2307 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2308 if (__predict_false(ret)) { 2309 if (ret == 1) { 2310 TXQ_UNLOCK(qs); 2311 return (EINTR); 2312 } 2313 goto again; 2314 } 2315 2316 gen = q->gen; 2317 q->in_use += ndesc; 2318 pidx = q->pidx; 2319 q->pidx += ndesc; 2320 if (q->pidx >= q->size) { 2321 q->pidx -= q->size; 2322 q->gen ^= 1; 2323 } 2324 2325 write_ofld_wr(adap, m, q, pidx, gen, ndesc); 2326 check_ring_tx_db(adap, q, 1); 2327 TXQ_UNLOCK(qs); 2328 2329 return (0); 2330} 2331 2332/** 2333 * restart_offloadq - restart a suspended offload queue 2334 * @qs: the queue set cotaining the offload queue 2335 * 2336 * Resumes transmission on a suspended Tx offload queue. 2337 */ 2338static void 2339restart_offloadq(void *data, int npending) 2340{ 2341 struct mbuf *m; 2342 struct sge_qset *qs = data; 2343 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2344 adapter_t *adap = qs->port->adapter; 2345 int cleaned; 2346 2347 TXQ_LOCK(qs); 2348again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2349 2350 while ((m = mbufq_first(&q->sendq)) != NULL) { 2351 unsigned int gen, pidx; 2352 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2353 unsigned int ndesc = G_HDR_NDESC(oh->flags); 2354 2355 if (__predict_false(q->size - q->in_use < ndesc)) { 2356 setbit(&qs->txq_stopped, TXQ_OFLD); 2357 if (should_restart_tx(q) && 2358 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2359 goto again; 2360 q->stops++; 2361 break; 2362 } 2363 2364 gen = q->gen; 2365 q->in_use += ndesc; 2366 pidx = q->pidx; 2367 q->pidx += ndesc; 2368 if (q->pidx >= q->size) { 2369 q->pidx -= q->size; 2370 q->gen ^= 1; 2371 } 2372 2373 (void)mbufq_dequeue(&q->sendq); 2374 TXQ_UNLOCK(qs); 2375 write_ofld_wr(adap, m, q, pidx, gen, ndesc); 2376 TXQ_LOCK(qs); 2377 } 2378#if USE_GTS 2379 set_bit(TXQ_RUNNING, &q->flags); 2380 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2381#endif 2382 TXQ_UNLOCK(qs); 2383 wmb(); 2384 t3_write_reg(adap, A_SG_KDOORBELL, 2385 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2386} 2387 2388/** 2389 * t3_offload_tx - send an offload packet 2390 * @m: the packet 2391 * 2392 * Sends an offload packet. We use the packet priority to select the 2393 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2394 * should be sent as regular or control, bits 1-3 select the queue set. 2395 */ 2396int 2397t3_offload_tx(struct adapter *sc, struct mbuf *m) 2398{ 2399 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2400 struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)]; 2401 2402 if (oh->flags & F_HDR_CTRL) { 2403 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */ 2404 return (ctrl_xmit(sc, qs, m)); 2405 } else 2406 return (ofld_xmit(sc, qs, m)); 2407} 2408#endif 2409 2410static void 2411restart_tx(struct sge_qset *qs) 2412{ 2413 struct adapter *sc = qs->port->adapter; 2414 2415 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2416 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2417 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2418 qs->txq[TXQ_OFLD].restarts++; 2419 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2420 } 2421 2422 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2423 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2424 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2425 qs->txq[TXQ_CTRL].restarts++; 2426 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2427 } 2428} 2429 2430/** 2431 * t3_sge_alloc_qset - initialize an SGE queue set 2432 * @sc: the controller softc 2433 * @id: the queue set id 2434 * @nports: how many Ethernet ports will be using this queue set 2435 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2436 * @p: configuration parameters for this queue set 2437 * @ntxq: number of Tx queues for the queue set 2438 * @pi: port info for queue set 2439 * 2440 * Allocate resources and initialize an SGE queue set. A queue set 2441 * comprises a response queue, two Rx free-buffer queues, and up to 3 2442 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2443 * queue, offload queue, and control queue. 2444 */ 2445int 2446t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2447 const struct qset_params *p, int ntxq, struct port_info *pi) 2448{ 2449 struct sge_qset *q = &sc->sge.qs[id]; 2450 int i, ret = 0; 2451 2452 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2453 q->port = pi; 2454 q->adap = sc; 2455 2456 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2457 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2458 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2459 goto err; 2460 } 2461 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, 2462 M_NOWAIT | M_ZERO)) == NULL) { 2463 device_printf(sc->dev, "failed to allocate ifq\n"); 2464 goto err; 2465 } 2466 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); 2467 callout_init(&q->txq[TXQ_ETH].txq_timer, 1); 2468 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); 2469 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; 2470 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; 2471 2472 init_qset_cntxt(q, id); 2473 q->idx = id; 2474 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2475 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2476 &q->fl[0].desc, &q->fl[0].sdesc, 2477 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2478 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2479 printf("error %d from alloc ring fl0\n", ret); 2480 goto err; 2481 } 2482 2483 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2484 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2485 &q->fl[1].desc, &q->fl[1].sdesc, 2486 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2487 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2488 printf("error %d from alloc ring fl1\n", ret); 2489 goto err; 2490 } 2491 2492 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2493 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2494 &q->rspq.desc_tag, &q->rspq.desc_map, 2495 NULL, NULL)) != 0) { 2496 printf("error %d from alloc ring rspq\n", ret); 2497 goto err; 2498 } 2499 2500 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2501 device_get_unit(sc->dev), irq_vec_idx); 2502 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2503 2504 for (i = 0; i < ntxq; ++i) { 2505 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2506 2507 if ((ret = alloc_ring(sc, p->txq_size[i], 2508 sizeof(struct tx_desc), sz, 2509 &q->txq[i].phys_addr, &q->txq[i].desc, 2510 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2511 &q->txq[i].desc_map, 2512 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2513 printf("error %d from alloc ring tx %i\n", ret, i); 2514 goto err; 2515 } 2516 mbufq_init(&q->txq[i].sendq, INT_MAX); 2517 q->txq[i].gen = 1; 2518 q->txq[i].size = p->txq_size[i]; 2519 } 2520 2521#ifdef TCP_OFFLOAD 2522 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2523#endif 2524 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2525 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2526 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2527 2528 q->fl[0].gen = q->fl[1].gen = 1; 2529 q->fl[0].size = p->fl_size; 2530 q->fl[1].size = p->jumbo_size; 2531 2532 q->rspq.gen = 1; 2533 q->rspq.cidx = 0; 2534 q->rspq.size = p->rspq_size; 2535 2536 q->txq[TXQ_ETH].stop_thres = nports * 2537 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2538 2539 q->fl[0].buf_size = MCLBYTES; 2540 q->fl[0].zone = zone_pack; 2541 q->fl[0].type = EXT_PACKET; 2542 2543 if (p->jumbo_buf_size == MJUM16BYTES) { 2544 q->fl[1].zone = zone_jumbo16; 2545 q->fl[1].type = EXT_JUMBO16; 2546 } else if (p->jumbo_buf_size == MJUM9BYTES) { 2547 q->fl[1].zone = zone_jumbo9; 2548 q->fl[1].type = EXT_JUMBO9; 2549 } else if (p->jumbo_buf_size == MJUMPAGESIZE) { 2550 q->fl[1].zone = zone_jumbop; 2551 q->fl[1].type = EXT_JUMBOP; 2552 } else { 2553 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); 2554 ret = EDOOFUS; 2555 goto err; 2556 } 2557 q->fl[1].buf_size = p->jumbo_buf_size; 2558 2559 /* Allocate and setup the lro_ctrl structure */ 2560 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2561#if defined(INET6) || defined(INET) 2562 ret = tcp_lro_init(&q->lro.ctrl); 2563 if (ret) { 2564 printf("error %d from tcp_lro_init\n", ret); 2565 goto err; 2566 } 2567#endif 2568 q->lro.ctrl.ifp = pi->ifp; 2569 2570 mtx_lock_spin(&sc->sge.reg_lock); 2571 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2572 q->rspq.phys_addr, q->rspq.size, 2573 q->fl[0].buf_size, 1, 0); 2574 if (ret) { 2575 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2576 goto err_unlock; 2577 } 2578 2579 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2580 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2581 q->fl[i].phys_addr, q->fl[i].size, 2582 q->fl[i].buf_size, p->cong_thres, 1, 2583 0); 2584 if (ret) { 2585 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2586 goto err_unlock; 2587 } 2588 } 2589 2590 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2591 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2592 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2593 1, 0); 2594 if (ret) { 2595 printf("error %d from t3_sge_init_ecntxt\n", ret); 2596 goto err_unlock; 2597 } 2598 2599 if (ntxq > 1) { 2600 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2601 USE_GTS, SGE_CNTXT_OFLD, id, 2602 q->txq[TXQ_OFLD].phys_addr, 2603 q->txq[TXQ_OFLD].size, 0, 1, 0); 2604 if (ret) { 2605 printf("error %d from t3_sge_init_ecntxt\n", ret); 2606 goto err_unlock; 2607 } 2608 } 2609 2610 if (ntxq > 2) { 2611 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2612 SGE_CNTXT_CTRL, id, 2613 q->txq[TXQ_CTRL].phys_addr, 2614 q->txq[TXQ_CTRL].size, 2615 q->txq[TXQ_CTRL].token, 1, 0); 2616 if (ret) { 2617 printf("error %d from t3_sge_init_ecntxt\n", ret); 2618 goto err_unlock; 2619 } 2620 } 2621 2622 mtx_unlock_spin(&sc->sge.reg_lock); 2623 t3_update_qset_coalesce(q, p); 2624 2625 refill_fl(sc, &q->fl[0], q->fl[0].size); 2626 refill_fl(sc, &q->fl[1], q->fl[1].size); 2627 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2628 2629 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2630 V_NEWTIMER(q->rspq.holdoff_tmr)); 2631 2632 return (0); 2633 2634err_unlock: 2635 mtx_unlock_spin(&sc->sge.reg_lock); 2636err: 2637 TXQ_LOCK(q); 2638 t3_free_qset(sc, q); 2639 2640 return (ret); 2641} 2642 2643/* 2644 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2645 * ethernet data. Hardware assistance with various checksums and any vlan tag 2646 * will also be taken into account here. 2647 */ 2648void 2649t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad) 2650{ 2651 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2652 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2653 struct ifnet *ifp = pi->ifp; 2654 2655 if (cpl->vlan_valid) { 2656 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2657 m->m_flags |= M_VLANTAG; 2658 } 2659 2660 m->m_pkthdr.rcvif = ifp; 2661 /* 2662 * adjust after conversion to mbuf chain 2663 */ 2664 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2665 m->m_len -= (sizeof(*cpl) + ethpad); 2666 m->m_data += (sizeof(*cpl) + ethpad); 2667 2668 if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) { 2669 struct ether_header *eh = mtod(m, void *); 2670 uint16_t eh_type; 2671 2672 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2673 struct ether_vlan_header *evh = mtod(m, void *); 2674 2675 eh_type = evh->evl_proto; 2676 } else 2677 eh_type = eh->ether_type; 2678 2679 if (ifp->if_capenable & IFCAP_RXCSUM && 2680 eh_type == htons(ETHERTYPE_IP)) { 2681 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | 2682 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2683 m->m_pkthdr.csum_data = 0xffff; 2684 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 2685 eh_type == htons(ETHERTYPE_IPV6)) { 2686 m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | 2687 CSUM_PSEUDO_HDR); 2688 m->m_pkthdr.csum_data = 0xffff; 2689 } 2690 } 2691} 2692 2693/** 2694 * get_packet - return the next ingress packet buffer from a free list 2695 * @adap: the adapter that received the packet 2696 * @drop_thres: # of remaining buffers before we start dropping packets 2697 * @qs: the qset that the SGE free list holding the packet belongs to 2698 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2699 * @r: response descriptor 2700 * 2701 * Get the next packet from a free list and complete setup of the 2702 * sk_buff. If the packet is small we make a copy and recycle the 2703 * original buffer, otherwise we use the original buffer itself. If a 2704 * positive drop threshold is supplied packets are dropped and their 2705 * buffers recycled if (a) the number of remaining buffers is under the 2706 * threshold and the packet is too big to copy, or (b) the packet should 2707 * be copied but there is no memory for the copy. 2708 */ 2709static int 2710get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2711 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2712{ 2713 2714 unsigned int len_cq = ntohl(r->len_cq); 2715 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2716 int mask, cidx = fl->cidx; 2717 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2718 uint32_t len = G_RSPD_LEN(len_cq); 2719 uint32_t flags = M_EXT; 2720 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2721 caddr_t cl; 2722 struct mbuf *m; 2723 int ret = 0; 2724 2725 mask = fl->size - 1; 2726 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2727 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2728 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2729 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2730 2731 fl->credits--; 2732 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2733 2734 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2735 sopeop == RSPQ_SOP_EOP) { 2736 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) 2737 goto skip_recycle; 2738 cl = mtod(m, void *); 2739 memcpy(cl, sd->rxsd_cl, len); 2740 recycle_rx_buf(adap, fl, fl->cidx); 2741 m->m_pkthdr.len = m->m_len = len; 2742 m->m_flags = 0; 2743 mh->mh_head = mh->mh_tail = m; 2744 ret = 1; 2745 goto done; 2746 } else { 2747 skip_recycle: 2748 bus_dmamap_unload(fl->entry_tag, sd->map); 2749 cl = sd->rxsd_cl; 2750 m = sd->m; 2751 2752 if ((sopeop == RSPQ_SOP_EOP) || 2753 (sopeop == RSPQ_SOP)) 2754 flags |= M_PKTHDR; 2755 m_init(m, M_NOWAIT, MT_DATA, flags); 2756 if (fl->zone == zone_pack) { 2757 /* 2758 * restore clobbered data pointer 2759 */ 2760 m->m_data = m->m_ext.ext_buf; 2761 } else { 2762 m_cljset(m, cl, fl->type); 2763 } 2764 m->m_len = len; 2765 } 2766 switch(sopeop) { 2767 case RSPQ_SOP_EOP: 2768 ret = 1; 2769 /* FALLTHROUGH */ 2770 case RSPQ_SOP: 2771 mh->mh_head = mh->mh_tail = m; 2772 m->m_pkthdr.len = len; 2773 break; 2774 case RSPQ_EOP: 2775 ret = 1; 2776 /* FALLTHROUGH */ 2777 case RSPQ_NSOP_NEOP: 2778 if (mh->mh_tail == NULL) { 2779 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2780 m_freem(m); 2781 m = NULL; 2782 break; 2783 } 2784 mh->mh_tail->m_next = m; 2785 mh->mh_tail = m; 2786 mh->mh_head->m_pkthdr.len += len; 2787 break; 2788 } 2789 if (cxgb_debug && m != NULL) 2790 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2791done: 2792 if (++fl->cidx == fl->size) 2793 fl->cidx = 0; 2794 2795 return (ret); 2796} 2797 2798/** 2799 * handle_rsp_cntrl_info - handles control information in a response 2800 * @qs: the queue set corresponding to the response 2801 * @flags: the response control flags 2802 * 2803 * Handles the control information of an SGE response, such as GTS 2804 * indications and completion credits for the queue set's Tx queues. 2805 * HW coalesces credits, we don't do any extra SW coalescing. 2806 */ 2807static __inline void 2808handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2809{ 2810 unsigned int credits; 2811 2812#if USE_GTS 2813 if (flags & F_RSPD_TXQ0_GTS) 2814 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2815#endif 2816 credits = G_RSPD_TXQ0_CR(flags); 2817 if (credits) 2818 qs->txq[TXQ_ETH].processed += credits; 2819 2820 credits = G_RSPD_TXQ2_CR(flags); 2821 if (credits) 2822 qs->txq[TXQ_CTRL].processed += credits; 2823 2824# if USE_GTS 2825 if (flags & F_RSPD_TXQ1_GTS) 2826 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2827# endif 2828 credits = G_RSPD_TXQ1_CR(flags); 2829 if (credits) 2830 qs->txq[TXQ_OFLD].processed += credits; 2831 2832} 2833 2834static void 2835check_ring_db(adapter_t *adap, struct sge_qset *qs, 2836 unsigned int sleeping) 2837{ 2838 ; 2839} 2840 2841/** 2842 * process_responses - process responses from an SGE response queue 2843 * @adap: the adapter 2844 * @qs: the queue set to which the response queue belongs 2845 * @budget: how many responses can be processed in this round 2846 * 2847 * Process responses from an SGE response queue up to the supplied budget. 2848 * Responses include received packets as well as credits and other events 2849 * for the queues that belong to the response queue's queue set. 2850 * A negative budget is effectively unlimited. 2851 * 2852 * Additionally choose the interrupt holdoff time for the next interrupt 2853 * on this queue. If the system is under memory shortage use a fairly 2854 * long delay to help recovery. 2855 */ 2856static int 2857process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2858{ 2859 struct sge_rspq *rspq = &qs->rspq; 2860 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2861 int budget_left = budget; 2862 unsigned int sleeping = 0; 2863#if defined(INET6) || defined(INET) 2864 int lro_enabled = qs->lro.enabled; 2865 int skip_lro; 2866 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2867#endif 2868 struct t3_mbuf_hdr *mh = &rspq->rspq_mh; 2869#ifdef DEBUG 2870 static int last_holdoff = 0; 2871 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2872 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2873 last_holdoff = rspq->holdoff_tmr; 2874 } 2875#endif 2876 rspq->next_holdoff = rspq->holdoff_tmr; 2877 2878 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2879 int eth, eop = 0, ethpad = 0; 2880 uint32_t flags = ntohl(r->flags); 2881 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2882 uint8_t opcode = r->rss_hdr.opcode; 2883 2884 eth = (opcode == CPL_RX_PKT); 2885 2886 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2887 struct mbuf *m; 2888 2889 if (cxgb_debug) 2890 printf("async notification\n"); 2891 2892 if (mh->mh_head == NULL) { 2893 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA); 2894 m = mh->mh_head; 2895 } else { 2896 m = m_gethdr(M_NOWAIT, MT_DATA); 2897 } 2898 if (m == NULL) 2899 goto no_mem; 2900 2901 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 2902 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 2903 *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF; 2904 opcode = CPL_ASYNC_NOTIF; 2905 eop = 1; 2906 rspq->async_notif++; 2907 goto skip; 2908 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2909 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 2910 2911 if (m == NULL) { 2912 no_mem: 2913 rspq->next_holdoff = NOMEM_INTR_DELAY; 2914 budget_left--; 2915 break; 2916 } 2917 if (mh->mh_head == NULL) 2918 mh->mh_head = m; 2919 else 2920 mh->mh_tail->m_next = m; 2921 mh->mh_tail = m; 2922 2923 get_imm_packet(adap, r, m); 2924 mh->mh_head->m_pkthdr.len += m->m_len; 2925 eop = 1; 2926 rspq->imm_data++; 2927 } else if (r->len_cq) { 2928 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2929 2930 eop = get_packet(adap, drop_thresh, qs, mh, r); 2931 if (eop) { 2932 if (r->rss_hdr.hash_type && !adap->timestamp) { 2933 M_HASHTYPE_SET(mh->mh_head, 2934 M_HASHTYPE_OPAQUE_HASH); 2935 mh->mh_head->m_pkthdr.flowid = rss_hash; 2936 } 2937 } 2938 2939 ethpad = 2; 2940 } else { 2941 rspq->pure_rsps++; 2942 } 2943 skip: 2944 if (flags & RSPD_CTRL_MASK) { 2945 sleeping |= flags & RSPD_GTS_MASK; 2946 handle_rsp_cntrl_info(qs, flags); 2947 } 2948 2949 if (!eth && eop) { 2950 rspq->offload_pkts++; 2951#ifdef TCP_OFFLOAD 2952 adap->cpl_handler[opcode](qs, r, mh->mh_head); 2953#else 2954 m_freem(mh->mh_head); 2955#endif 2956 mh->mh_head = NULL; 2957 } else if (eth && eop) { 2958 struct mbuf *m = mh->mh_head; 2959 2960 t3_rx_eth(adap, m, ethpad); 2961 2962 /* 2963 * The T304 sends incoming packets on any qset. If LRO 2964 * is also enabled, we could end up sending packet up 2965 * lro_ctrl->ifp's input. That is incorrect. 2966 * 2967 * The mbuf's rcvif was derived from the cpl header and 2968 * is accurate. Skip LRO and just use that. 2969 */ 2970#if defined(INET6) || defined(INET) 2971 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 2972 2973 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro 2974 && (tcp_lro_rx(lro_ctrl, m, 0) == 0) 2975 ) { 2976 /* successfully queue'd for LRO */ 2977 } else 2978#endif 2979 { 2980 /* 2981 * LRO not enabled, packet unsuitable for LRO, 2982 * or unable to queue. Pass it up right now in 2983 * either case. 2984 */ 2985 struct ifnet *ifp = m->m_pkthdr.rcvif; 2986 (*ifp->if_input)(ifp, m); 2987 } 2988 mh->mh_head = NULL; 2989 2990 } 2991 2992 r++; 2993 if (__predict_false(++rspq->cidx == rspq->size)) { 2994 rspq->cidx = 0; 2995 rspq->gen ^= 1; 2996 r = rspq->desc; 2997 } 2998 2999 if (++rspq->credits >= 64) { 3000 refill_rspq(adap, rspq, rspq->credits); 3001 rspq->credits = 0; 3002 } 3003 __refill_fl_lt(adap, &qs->fl[0], 32); 3004 __refill_fl_lt(adap, &qs->fl[1], 32); 3005 --budget_left; 3006 } 3007 3008#if defined(INET6) || defined(INET) 3009 /* Flush LRO */ 3010 tcp_lro_flush_all(lro_ctrl); 3011#endif 3012 3013 if (sleeping) 3014 check_ring_db(adap, qs, sleeping); 3015 3016 mb(); /* commit Tx queue processed updates */ 3017 if (__predict_false(qs->txq_stopped > 1)) 3018 restart_tx(qs); 3019 3020 __refill_fl_lt(adap, &qs->fl[0], 512); 3021 __refill_fl_lt(adap, &qs->fl[1], 512); 3022 budget -= budget_left; 3023 return (budget); 3024} 3025 3026/* 3027 * A helper function that processes responses and issues GTS. 3028 */ 3029static __inline int 3030process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3031{ 3032 int work; 3033 static int last_holdoff = 0; 3034 3035 work = process_responses(adap, rspq_to_qset(rq), -1); 3036 3037 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3038 printf("next_holdoff=%d\n", rq->next_holdoff); 3039 last_holdoff = rq->next_holdoff; 3040 } 3041 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3042 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3043 3044 return (work); 3045} 3046 3047#ifdef NETDUMP 3048int 3049cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs) 3050{ 3051 3052 return (process_responses_gts(adap, &qs->rspq)); 3053} 3054#endif 3055 3056/* 3057 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3058 * Handles data events from SGE response queues as well as error and other 3059 * async events as they all use the same interrupt pin. We use one SGE 3060 * response queue per port in this mode and protect all response queues with 3061 * queue 0's lock. 3062 */ 3063void 3064t3b_intr(void *data) 3065{ 3066 uint32_t i, map; 3067 adapter_t *adap = data; 3068 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3069 3070 t3_write_reg(adap, A_PL_CLI, 0); 3071 map = t3_read_reg(adap, A_SG_DATA_INTR); 3072 3073 if (!map) 3074 return; 3075 3076 if (__predict_false(map & F_ERRINTR)) { 3077 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3078 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3079 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3080 } 3081 3082 mtx_lock(&q0->lock); 3083 for_each_port(adap, i) 3084 if (map & (1 << i)) 3085 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3086 mtx_unlock(&q0->lock); 3087} 3088 3089/* 3090 * The MSI interrupt handler. This needs to handle data events from SGE 3091 * response queues as well as error and other async events as they all use 3092 * the same MSI vector. We use one SGE response queue per port in this mode 3093 * and protect all response queues with queue 0's lock. 3094 */ 3095void 3096t3_intr_msi(void *data) 3097{ 3098 adapter_t *adap = data; 3099 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3100 int i, new_packets = 0; 3101 3102 mtx_lock(&q0->lock); 3103 3104 for_each_port(adap, i) 3105 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3106 new_packets = 1; 3107 mtx_unlock(&q0->lock); 3108 if (new_packets == 0) { 3109 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3110 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3111 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3112 } 3113} 3114 3115void 3116t3_intr_msix(void *data) 3117{ 3118 struct sge_qset *qs = data; 3119 adapter_t *adap = qs->port->adapter; 3120 struct sge_rspq *rspq = &qs->rspq; 3121 3122 if (process_responses_gts(adap, rspq) == 0) 3123 rspq->unhandled_irqs++; 3124} 3125 3126#define QDUMP_SBUF_SIZE 32 * 400 3127static int 3128t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3129{ 3130 struct sge_rspq *rspq; 3131 struct sge_qset *qs; 3132 int i, err, dump_end, idx; 3133 struct sbuf *sb; 3134 struct rsp_desc *rspd; 3135 uint32_t data[4]; 3136 3137 rspq = arg1; 3138 qs = rspq_to_qset(rspq); 3139 if (rspq->rspq_dump_count == 0) 3140 return (0); 3141 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3142 log(LOG_WARNING, 3143 "dump count is too large %d\n", rspq->rspq_dump_count); 3144 rspq->rspq_dump_count = 0; 3145 return (EINVAL); 3146 } 3147 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3148 log(LOG_WARNING, 3149 "dump start of %d is greater than queue size\n", 3150 rspq->rspq_dump_start); 3151 rspq->rspq_dump_start = 0; 3152 return (EINVAL); 3153 } 3154 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3155 if (err) 3156 return (err); 3157 err = sysctl_wire_old_buffer(req, 0); 3158 if (err) 3159 return (err); 3160 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3161 3162 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3163 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3164 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3165 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3166 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3167 3168 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3169 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3170 3171 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3172 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3173 idx = i & (RSPQ_Q_SIZE-1); 3174 3175 rspd = &rspq->desc[idx]; 3176 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3177 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3178 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3179 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3180 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3181 be32toh(rspd->len_cq), rspd->intr_gen); 3182 } 3183 3184 err = sbuf_finish(sb); 3185 sbuf_delete(sb); 3186 return (err); 3187} 3188 3189static int 3190t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3191{ 3192 struct sge_txq *txq; 3193 struct sge_qset *qs; 3194 int i, j, err, dump_end; 3195 struct sbuf *sb; 3196 struct tx_desc *txd; 3197 uint32_t *WR, wr_hi, wr_lo, gen; 3198 uint32_t data[4]; 3199 3200 txq = arg1; 3201 qs = txq_to_qset(txq, TXQ_ETH); 3202 if (txq->txq_dump_count == 0) { 3203 return (0); 3204 } 3205 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3206 log(LOG_WARNING, 3207 "dump count is too large %d\n", txq->txq_dump_count); 3208 txq->txq_dump_count = 1; 3209 return (EINVAL); 3210 } 3211 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3212 log(LOG_WARNING, 3213 "dump start of %d is greater than queue size\n", 3214 txq->txq_dump_start); 3215 txq->txq_dump_start = 0; 3216 return (EINVAL); 3217 } 3218 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3219 if (err) 3220 return (err); 3221 err = sysctl_wire_old_buffer(req, 0); 3222 if (err) 3223 return (err); 3224 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3225 3226 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3227 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3228 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3229 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3230 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3231 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3232 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3233 txq->txq_dump_start, 3234 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3235 3236 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3237 for (i = txq->txq_dump_start; i < dump_end; i++) { 3238 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3239 WR = (uint32_t *)txd->flit; 3240 wr_hi = ntohl(WR[0]); 3241 wr_lo = ntohl(WR[1]); 3242 gen = G_WR_GEN(wr_lo); 3243 3244 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3245 wr_hi, wr_lo, gen); 3246 for (j = 2; j < 30; j += 4) 3247 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3248 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3249 3250 } 3251 err = sbuf_finish(sb); 3252 sbuf_delete(sb); 3253 return (err); 3254} 3255 3256static int 3257t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3258{ 3259 struct sge_txq *txq; 3260 struct sge_qset *qs; 3261 int i, j, err, dump_end; 3262 struct sbuf *sb; 3263 struct tx_desc *txd; 3264 uint32_t *WR, wr_hi, wr_lo, gen; 3265 3266 txq = arg1; 3267 qs = txq_to_qset(txq, TXQ_CTRL); 3268 if (txq->txq_dump_count == 0) { 3269 return (0); 3270 } 3271 if (txq->txq_dump_count > 256) { 3272 log(LOG_WARNING, 3273 "dump count is too large %d\n", txq->txq_dump_count); 3274 txq->txq_dump_count = 1; 3275 return (EINVAL); 3276 } 3277 if (txq->txq_dump_start > 255) { 3278 log(LOG_WARNING, 3279 "dump start of %d is greater than queue size\n", 3280 txq->txq_dump_start); 3281 txq->txq_dump_start = 0; 3282 return (EINVAL); 3283 } 3284 3285 err = sysctl_wire_old_buffer(req, 0); 3286 if (err != 0) 3287 return (err); 3288 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3289 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3290 txq->txq_dump_start, 3291 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3292 3293 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3294 for (i = txq->txq_dump_start; i < dump_end; i++) { 3295 txd = &txq->desc[i & (255)]; 3296 WR = (uint32_t *)txd->flit; 3297 wr_hi = ntohl(WR[0]); 3298 wr_lo = ntohl(WR[1]); 3299 gen = G_WR_GEN(wr_lo); 3300 3301 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3302 wr_hi, wr_lo, gen); 3303 for (j = 2; j < 30; j += 4) 3304 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3305 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3306 3307 } 3308 err = sbuf_finish(sb); 3309 sbuf_delete(sb); 3310 return (err); 3311} 3312 3313static int 3314t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3315{ 3316 adapter_t *sc = arg1; 3317 struct qset_params *qsp = &sc->params.sge.qset[0]; 3318 int coalesce_usecs; 3319 struct sge_qset *qs; 3320 int i, j, err, nqsets = 0; 3321 struct mtx *lock; 3322 3323 if ((sc->flags & FULL_INIT_DONE) == 0) 3324 return (ENXIO); 3325 3326 coalesce_usecs = qsp->coalesce_usecs; 3327 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3328 3329 if (err != 0) { 3330 return (err); 3331 } 3332 if (coalesce_usecs == qsp->coalesce_usecs) 3333 return (0); 3334 3335 for (i = 0; i < sc->params.nports; i++) 3336 for (j = 0; j < sc->port[i].nqsets; j++) 3337 nqsets++; 3338 3339 coalesce_usecs = max(1, coalesce_usecs); 3340 3341 for (i = 0; i < nqsets; i++) { 3342 qs = &sc->sge.qs[i]; 3343 qsp = &sc->params.sge.qset[i]; 3344 qsp->coalesce_usecs = coalesce_usecs; 3345 3346 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3347 &sc->sge.qs[0].rspq.lock; 3348 3349 mtx_lock(lock); 3350 t3_update_qset_coalesce(qs, qsp); 3351 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3352 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3353 mtx_unlock(lock); 3354 } 3355 3356 return (0); 3357} 3358 3359static int 3360t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) 3361{ 3362 adapter_t *sc = arg1; 3363 int rc, timestamp; 3364 3365 if ((sc->flags & FULL_INIT_DONE) == 0) 3366 return (ENXIO); 3367 3368 timestamp = sc->timestamp; 3369 rc = sysctl_handle_int(oidp, ×tamp, arg2, req); 3370 3371 if (rc != 0) 3372 return (rc); 3373 3374 if (timestamp != sc->timestamp) { 3375 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, 3376 timestamp ? F_ENABLERXPKTTMSTPRSS : 0); 3377 sc->timestamp = timestamp; 3378 } 3379 3380 return (0); 3381} 3382 3383void 3384t3_add_attach_sysctls(adapter_t *sc) 3385{ 3386 struct sysctl_ctx_list *ctx; 3387 struct sysctl_oid_list *children; 3388 3389 ctx = device_get_sysctl_ctx(sc->dev); 3390 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3391 3392 /* random information */ 3393 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3394 "firmware_version", 3395 CTLFLAG_RD, sc->fw_version, 3396 0, "firmware version"); 3397 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3398 "hw_revision", 3399 CTLFLAG_RD, &sc->params.rev, 3400 0, "chip model"); 3401 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3402 "port_types", 3403 CTLFLAG_RD, sc->port_types, 3404 0, "type of ports"); 3405 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3406 "enable_debug", 3407 CTLFLAG_RW, &cxgb_debug, 3408 0, "enable verbose debugging output"); 3409 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3410 CTLFLAG_RD, &sc->tunq_coalesce, 3411 "#tunneled packets freed"); 3412 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3413 "txq_overrun", 3414 CTLFLAG_RD, &txq_fills, 3415 0, "#times txq overrun"); 3416 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3417 "core_clock", 3418 CTLFLAG_RD, &sc->params.vpd.cclk, 3419 0, "core clock frequency (in KHz)"); 3420} 3421 3422 3423static const char *rspq_name = "rspq"; 3424static const char *txq_names[] = 3425{ 3426 "txq_eth", 3427 "txq_ofld", 3428 "txq_ctrl" 3429}; 3430 3431static int 3432sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3433{ 3434 struct port_info *p = arg1; 3435 uint64_t *parg; 3436 3437 if (!p) 3438 return (EINVAL); 3439 3440 cxgb_refresh_stats(p); 3441 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3442 3443 return (sysctl_handle_64(oidp, parg, 0, req)); 3444} 3445 3446void 3447t3_add_configured_sysctls(adapter_t *sc) 3448{ 3449 struct sysctl_ctx_list *ctx; 3450 struct sysctl_oid_list *children; 3451 int i, j; 3452 3453 ctx = device_get_sysctl_ctx(sc->dev); 3454 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3455 3456 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3457 "intr_coal", 3458 CTLTYPE_INT|CTLFLAG_RW, sc, 3459 0, t3_set_coalesce_usecs, 3460 "I", "interrupt coalescing timer (us)"); 3461 3462 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3463 "pkt_timestamp", 3464 CTLTYPE_INT | CTLFLAG_RW, sc, 3465 0, t3_pkt_timestamp, 3466 "I", "provide packet timestamp instead of connection hash"); 3467 3468 for (i = 0; i < sc->params.nports; i++) { 3469 struct port_info *pi = &sc->port[i]; 3470 struct sysctl_oid *poid; 3471 struct sysctl_oid_list *poidlist; 3472 struct mac_stats *mstats = &pi->mac.stats; 3473 3474 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3475 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3476 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3477 poidlist = SYSCTL_CHILDREN(poid); 3478 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, 3479 "nqsets", CTLFLAG_RD, &pi->nqsets, 3480 0, "#queue sets"); 3481 3482 for (j = 0; j < pi->nqsets; j++) { 3483 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3484 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3485 *ctrlqpoid, *lropoid; 3486 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3487 *txqpoidlist, *ctrlqpoidlist, 3488 *lropoidlist; 3489 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3490 3491 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3492 3493 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3494 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3495 qspoidlist = SYSCTL_CHILDREN(qspoid); 3496 3497 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3498 CTLFLAG_RD, &qs->fl[0].empty, 0, 3499 "freelist #0 empty"); 3500 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3501 CTLFLAG_RD, &qs->fl[1].empty, 0, 3502 "freelist #1 empty"); 3503 3504 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3505 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3506 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3507 3508 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3509 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3510 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3511 3512 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3513 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3514 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3515 3516 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3517 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3518 lropoidlist = SYSCTL_CHILDREN(lropoid); 3519 3520 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3521 CTLFLAG_RD, &qs->rspq.size, 3522 0, "#entries in response queue"); 3523 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3524 CTLFLAG_RD, &qs->rspq.cidx, 3525 0, "consumer index"); 3526 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3527 CTLFLAG_RD, &qs->rspq.credits, 3528 0, "#credits"); 3529 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", 3530 CTLFLAG_RD, &qs->rspq.starved, 3531 0, "#times starved"); 3532 SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3533 CTLFLAG_RD, &qs->rspq.phys_addr, 3534 "physical_address_of the queue"); 3535 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3536 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3537 0, "start rspq dump entry"); 3538 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3539 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3540 0, "#rspq entries to dump"); 3541 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3542 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3543 0, t3_dump_rspq, "A", "dump of the response queue"); 3544 3545 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", 3546 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, 3547 "#tunneled packets dropped"); 3548 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3549 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len, 3550 0, "#tunneled packets waiting to be sent"); 3551#if 0 3552 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3553 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3554 0, "#tunneled packets queue producer index"); 3555 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3556 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3557 0, "#tunneled packets queue consumer index"); 3558#endif 3559 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", 3560 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3561 0, "#tunneled packets processed by the card"); 3562 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3563 CTLFLAG_RD, &txq->cleaned, 3564 0, "#tunneled packets cleaned"); 3565 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3566 CTLFLAG_RD, &txq->in_use, 3567 0, "#tunneled packet slots in use"); 3568 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees", 3569 CTLFLAG_RD, &txq->txq_frees, 3570 "#tunneled packets freed"); 3571 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3572 CTLFLAG_RD, &txq->txq_skipped, 3573 0, "#tunneled packet descriptors skipped"); 3574 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3575 CTLFLAG_RD, &txq->txq_coalesced, 3576 "#tunneled packets coalesced"); 3577 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3578 CTLFLAG_RD, &txq->txq_enqueued, 3579 0, "#tunneled packets enqueued to hardware"); 3580 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3581 CTLFLAG_RD, &qs->txq_stopped, 3582 0, "tx queues stopped"); 3583 SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3584 CTLFLAG_RD, &txq->phys_addr, 3585 "physical_address_of the queue"); 3586 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3587 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3588 0, "txq generation"); 3589 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3590 CTLFLAG_RD, &txq->cidx, 3591 0, "hardware queue cidx"); 3592 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3593 CTLFLAG_RD, &txq->pidx, 3594 0, "hardware queue pidx"); 3595 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3596 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3597 0, "txq start idx for dump"); 3598 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3599 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3600 0, "txq #entries to dump"); 3601 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3602 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3603 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3604 3605 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3606 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3607 0, "ctrlq start idx for dump"); 3608 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3609 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3610 0, "ctrl #entries to dump"); 3611 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3612 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3613 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3614 3615 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued", 3616 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3617 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3618 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3619 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3620 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3621 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3622 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3623 } 3624 3625 /* Now add a node for mac stats. */ 3626 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3627 CTLFLAG_RD, NULL, "MAC statistics"); 3628 poidlist = SYSCTL_CHILDREN(poid); 3629 3630 /* 3631 * We (ab)use the length argument (arg2) to pass on the offset 3632 * of the data that we are interested in. This is only required 3633 * for the quad counters that are updated from the hardware (we 3634 * make sure that we return the latest value). 3635 * sysctl_handle_macstat first updates *all* the counters from 3636 * the hardware, and then returns the latest value of the 3637 * requested counter. Best would be to update only the 3638 * requested counter from hardware, but t3_mac_update_stats() 3639 * hides all the register details and we don't want to dive into 3640 * all that here. 3641 */ 3642#define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3643 (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3644 sysctl_handle_macstat, "QU", 0) 3645 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3646 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3647 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3648 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3649 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3650 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3651 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3652 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3653 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3654 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3655 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3656 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3657 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3658 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3659 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3660 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3661 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3662 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3663 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3664 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3665 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3666 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3667 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3668 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3669 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3670 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3671 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3672 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3673 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3674 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3675 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3676 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3677 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3678 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3679 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3680 CXGB_SYSCTL_ADD_QUAD(rx_short); 3681 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3682 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3683 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3684 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3685 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3686 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3687 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3688 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3689 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3690 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3691#undef CXGB_SYSCTL_ADD_QUAD 3692 3693#define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3694 CTLFLAG_RD, &mstats->a, 0) 3695 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3696 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3697 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3698 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3699 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3700 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3701 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3702 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3703 CXGB_SYSCTL_ADD_ULONG(num_resets); 3704 CXGB_SYSCTL_ADD_ULONG(link_faults); 3705#undef CXGB_SYSCTL_ADD_ULONG 3706 } 3707} 3708 3709/** 3710 * t3_get_desc - dump an SGE descriptor for debugging purposes 3711 * @qs: the queue set 3712 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3713 * @idx: the descriptor index in the queue 3714 * @data: where to dump the descriptor contents 3715 * 3716 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3717 * size of the descriptor. 3718 */ 3719int 3720t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3721 unsigned char *data) 3722{ 3723 if (qnum >= 6) 3724 return (EINVAL); 3725 3726 if (qnum < 3) { 3727 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3728 return -EINVAL; 3729 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3730 return sizeof(struct tx_desc); 3731 } 3732 3733 if (qnum == 3) { 3734 if (!qs->rspq.desc || idx >= qs->rspq.size) 3735 return (EINVAL); 3736 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3737 return sizeof(struct rsp_desc); 3738 } 3739 3740 qnum -= 4; 3741 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3742 return (EINVAL); 3743 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3744 return sizeof(struct rx_desc); 3745} 3746