cxgb_sge.c revision 195006
1226584Sdim/************************************************************************** 2226584Sdim 3226584SdimCopyright (c) 2007-2009, Chelsio Inc. 4226584SdimAll rights reserved. 5226584Sdim 6226584SdimRedistribution and use in source and binary forms, with or without 7226584Sdimmodification, are permitted provided that the following conditions are met: 8226584Sdim 9226584Sdim 1. Redistributions of source code must retain the above copyright notice, 10226584Sdim this list of conditions and the following disclaimer. 11226584Sdim 12226584Sdim 2. Neither the name of the Chelsio Corporation nor the names of its 13226584Sdim contributors may be used to endorse or promote products derived from 14226584Sdim this software without specific prior written permission. 15226584Sdim 16226584SdimTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17226584SdimAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18226584SdimIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19226584SdimARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20226584SdimLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21226584SdimCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22226584SdimSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23226584SdimINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24226584SdimCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25226584SdimARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26226584SdimPOSSIBILITY OF SUCH DAMAGE. 27226584Sdim 28226584Sdim***************************************************************************/ 29226584Sdim 30226584Sdim#include <sys/cdefs.h> 31226584Sdim__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 195006 2009-06-25 21:50:15Z np $"); 32226584Sdim 33226584Sdim#include <sys/param.h> 34226584Sdim#include <sys/systm.h> 35226584Sdim#include <sys/kernel.h> 36226584Sdim#include <sys/module.h> 37249423Sdim#include <sys/bus.h> 38239462Sdim#include <sys/conf.h> 39239462Sdim#include <machine/bus.h> 40239462Sdim#include <machine/resource.h> 41239462Sdim#include <sys/bus_dma.h> 42239462Sdim#include <sys/rman.h> 43226584Sdim#include <sys/queue.h> 44226584Sdim#include <sys/sysctl.h> 45226584Sdim#include <sys/taskqueue.h> 46226584Sdim 47226584Sdim#include <sys/proc.h> 48226584Sdim#include <sys/sbuf.h> 49226584Sdim#include <sys/sched.h> 50226584Sdim#include <sys/smp.h> 51226584Sdim#include <sys/systm.h> 52226584Sdim#include <sys/syslog.h> 53226584Sdim 54226584Sdim#include <net/bpf.h> 55226584Sdim 56226584Sdim#include <netinet/in_systm.h> 57226584Sdim#include <netinet/in.h> 58226584Sdim#include <netinet/ip.h> 59226584Sdim#include <netinet/tcp.h> 60226584Sdim 61226584Sdim#include <dev/pci/pcireg.h> 62226584Sdim#include <dev/pci/pcivar.h> 63226584Sdim 64226584Sdim#include <vm/vm.h> 65226584Sdim#include <vm/pmap.h> 66226584Sdim 67226584Sdim#include <cxgb_include.h> 68226584Sdim#include <sys/mvec.h> 69226584Sdim 70226584Sdimint txq_fills = 0; 71226584Sdimint multiq_tx_enable = 1; 72226584Sdim 73226584Sdimextern struct sysctl_oid_list sysctl__hw_cxgb_children; 74226584Sdimint cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 75226584SdimTUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); 76226584SdimSYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 77226584Sdim "size of per-queue mbuf ring"); 78263508Sdim 79226584Sdimstatic int cxgb_tx_coalesce_force = 0; 80263508SdimTUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); 81226584SdimSYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, 82226584Sdim &cxgb_tx_coalesce_force, 0, 83226584Sdim "coalesce small packets into a single work request regardless of ring state"); 84226584Sdim 85226584Sdim#define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 86226584Sdim#define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 87226584Sdim#define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 88226584Sdim#define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 89226584Sdim#define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 90226584Sdim#define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 91226584Sdim#define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 92226584Sdim 93226584Sdim 94263508Sdimstatic int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 95263508SdimTUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", 96226584Sdim &cxgb_tx_coalesce_enable_start); 97226584SdimSYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, 98226584Sdim &cxgb_tx_coalesce_enable_start, 0, 99226584Sdim "coalesce enable threshold"); 100226584Sdimstatic int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 101226584SdimTUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); 102226584SdimSYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, 103226584Sdim &cxgb_tx_coalesce_enable_stop, 0, 104249423Sdim "coalesce disable threshold"); 105249423Sdimstatic int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 106239462SdimTUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); 107249423SdimSYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, 108249423Sdim &cxgb_tx_reclaim_threshold, 0, 109249423Sdim "tx cleaning minimum threshold"); 110249423Sdim 111249423Sdim/* 112249423Sdim * XXX don't re-enable this until TOE stops assuming 113239462Sdim * we have an m_ext 114263508Sdim */ 115263508Sdimstatic int recycle_enable = 0; 116226584Sdimint cxgb_ext_freed = 0; 117226584Sdimint cxgb_ext_inited = 0; 118226584Sdimint fl_q_size = 0; 119226584Sdimint jumbo_q_size = 0; 120226584Sdim 121226584Sdimextern int cxgb_use_16k_clusters; 122239462Sdimextern int nmbjumbo4; 123226584Sdimextern int nmbjumbo9; 124249423Sdimextern int nmbjumbo16; 125249423Sdim 126226584Sdim#define USE_GTS 0 127226584Sdim 128249423Sdim#define SGE_RX_SM_BUF_SIZE 1536 129239462Sdim#define SGE_RX_DROP_THRES 16 130226584Sdim#define SGE_RX_COPY_THRES 128 131226584Sdim 132226584Sdim/* 133226584Sdim * Period of the Tx buffer reclaim timer. This timer does not need to run 134226584Sdim * frequently as Tx buffers are usually reclaimed by new Tx packets. 135226584Sdim */ 136226584Sdim#define TX_RECLAIM_PERIOD (hz >> 1) 137226584Sdim 138226584Sdim/* 139226584Sdim * Values for sge_txq.flags 140226584Sdim */ 141226584Sdimenum { 142239462Sdim TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 143239462Sdim TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 144239462Sdim}; 145239462Sdim 146226584Sdimstruct tx_desc { 147226584Sdim uint64_t flit[TX_DESC_FLITS]; 148226584Sdim} __packed; 149226584Sdim 150226584Sdimstruct rx_desc { 151226584Sdim uint32_t addr_lo; 152226584Sdim uint32_t len_gen; 153226584Sdim uint32_t gen2; 154226584Sdim uint32_t addr_hi; 155226584Sdim} __packed;; 156226584Sdim 157226584Sdimstruct rsp_desc { /* response queue descriptor */ 158226584Sdim struct rss_header rss_hdr; 159226584Sdim uint32_t flags; 160239462Sdim uint32_t len_cq; 161239462Sdim uint8_t imm_data[47]; 162263508Sdim uint8_t intr_gen; 163226584Sdim} __packed; 164239462Sdim 165239462Sdim#define RX_SW_DESC_MAP_CREATED (1 << 0) 166239462Sdim#define TX_SW_DESC_MAP_CREATED (1 << 1) 167263508Sdim#define RX_SW_DESC_INUSE (1 << 3) 168239462Sdim#define TX_SW_DESC_MAPPED (1 << 4) 169239462Sdim 170263508Sdim#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 171263508Sdim#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 172239462Sdim#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 173239462Sdim#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 174239462Sdim 175226584Sdimstruct tx_sw_desc { /* SW state per Tx descriptor */ 176226584Sdim struct mbuf *m; 177226584Sdim bus_dmamap_t map; 178263508Sdim int flags; 179226584Sdim}; 180239462Sdim 181263508Sdimstruct rx_sw_desc { /* SW state per Rx descriptor */ 182263508Sdim caddr_t rxsd_cl; 183239462Sdim struct mbuf *m; 184239462Sdim bus_dmamap_t map; 185226584Sdim int flags; 186226584Sdim}; 187226584Sdim 188226584Sdimstruct txq_state { 189226584Sdim unsigned int compl; 190226584Sdim unsigned int gen; 191226584Sdim unsigned int pidx; 192226584Sdim}; 193226584Sdim 194226584Sdimstruct refill_fl_cb_arg { 195226584Sdim int error; 196226584Sdim bus_dma_segment_t seg; 197226584Sdim int nseg; 198226584Sdim}; 199226584Sdim 200226584Sdim 201226584Sdim/* 202226584Sdim * Maps a number of flits to the number of Tx descriptors that can hold them. 203226584Sdim * The formula is 204226584Sdim * 205226584Sdim * desc = 1 + (flits - 2) / (WR_FLITS - 1). 206226584Sdim * 207226584Sdim * HW allows up to 4 descriptors to be combined into a WR. 208226584Sdim */ 209226584Sdimstatic uint8_t flit_desc_map[] = { 210226584Sdim 0, 211226584Sdim#if SGE_NUM_GENBITS == 1 212226584Sdim 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 213263508Sdim 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 214226584Sdim 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 215226584Sdim 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 216226584Sdim#elif SGE_NUM_GENBITS == 2 217226584Sdim 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 218263508Sdim 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 219226584Sdim 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 220226584Sdim 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 221263508Sdim#else 222226584Sdim# error "SGE_NUM_GENBITS must be 1 or 2" 223226584Sdim#endif 224226584Sdim}; 225226584Sdim 226226584Sdim#define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 227226584Sdim#define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 228226584Sdim#define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 229226584Sdim#define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 230226584Sdim#define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 231239462Sdim#define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 232226584Sdim#define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 233226584Sdim drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 234226584Sdim#define TXQ_RING_DEQUEUE(qs) \ 235226584Sdim drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 236226584Sdim 237int cxgb_debug = 0; 238 239static void sge_timer_cb(void *arg); 240static void sge_timer_reclaim(void *arg, int ncount); 241static void sge_txq_reclaim_handler(void *arg, int ncount); 242static void cxgb_start_locked(struct sge_qset *qs); 243 244/* 245 * XXX need to cope with bursty scheduling by looking at a wider 246 * window than we are now for determining the need for coalescing 247 * 248 */ 249static __inline uint64_t 250check_pkt_coalesce(struct sge_qset *qs) 251{ 252 struct adapter *sc; 253 struct sge_txq *txq; 254 uint8_t *fill; 255 256 if (__predict_false(cxgb_tx_coalesce_force)) 257 return (1); 258 txq = &qs->txq[TXQ_ETH]; 259 sc = qs->port->adapter; 260 fill = &sc->tunq_fill[qs->idx]; 261 262 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 263 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 264 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 265 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 266 /* 267 * if the hardware transmit queue is more than 1/8 full 268 * we mark it as coalescing - we drop back from coalescing 269 * when we go below 1/32 full and there are no packets enqueued, 270 * this provides us with some degree of hysteresis 271 */ 272 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 273 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 274 *fill = 0; 275 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 276 *fill = 1; 277 278 return (sc->tunq_coalesce); 279} 280 281#ifdef __LP64__ 282static void 283set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 284{ 285 uint64_t wr_hilo; 286#if _BYTE_ORDER == _LITTLE_ENDIAN 287 wr_hilo = wr_hi; 288 wr_hilo |= (((uint64_t)wr_lo)<<32); 289#else 290 wr_hilo = wr_lo; 291 wr_hilo |= (((uint64_t)wr_hi)<<32); 292#endif 293 wrp->wrh_hilo = wr_hilo; 294} 295#else 296static void 297set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 298{ 299 300 wrp->wrh_hi = wr_hi; 301 wmb(); 302 wrp->wrh_lo = wr_lo; 303} 304#endif 305 306struct coalesce_info { 307 int count; 308 int nbytes; 309}; 310 311static int 312coalesce_check(struct mbuf *m, void *arg) 313{ 314 struct coalesce_info *ci = arg; 315 int *count = &ci->count; 316 int *nbytes = &ci->nbytes; 317 318 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 319 (*count < 7) && (m->m_next == NULL))) { 320 *count += 1; 321 *nbytes += m->m_len; 322 return (1); 323 } 324 return (0); 325} 326 327static struct mbuf * 328cxgb_dequeue(struct sge_qset *qs) 329{ 330 struct mbuf *m, *m_head, *m_tail; 331 struct coalesce_info ci; 332 333 334 if (check_pkt_coalesce(qs) == 0) 335 return TXQ_RING_DEQUEUE(qs); 336 337 m_head = m_tail = NULL; 338 ci.count = ci.nbytes = 0; 339 do { 340 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 341 if (m_head == NULL) { 342 m_tail = m_head = m; 343 } else if (m != NULL) { 344 m_tail->m_nextpkt = m; 345 m_tail = m; 346 } 347 } while (m != NULL); 348 if (ci.count > 7) 349 panic("trying to coalesce %d packets in to one WR", ci.count); 350 return (m_head); 351} 352 353/** 354 * reclaim_completed_tx - reclaims completed Tx descriptors 355 * @adapter: the adapter 356 * @q: the Tx queue to reclaim completed descriptors from 357 * 358 * Reclaims Tx descriptors that the SGE has indicated it has processed, 359 * and frees the associated buffers if possible. Called with the Tx 360 * queue's lock held. 361 */ 362static __inline int 363reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 364{ 365 struct sge_txq *q = &qs->txq[queue]; 366 int reclaim = desc_reclaimable(q); 367 368 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 369 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 370 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 371 372 if (reclaim < reclaim_min) 373 return (0); 374 375 mtx_assert(&qs->lock, MA_OWNED); 376 if (reclaim > 0) { 377 t3_free_tx_desc(qs, reclaim, queue); 378 q->cleaned += reclaim; 379 q->in_use -= reclaim; 380 } 381 if (isset(&qs->txq_stopped, TXQ_ETH)) 382 clrbit(&qs->txq_stopped, TXQ_ETH); 383 384 return (reclaim); 385} 386 387/** 388 * should_restart_tx - are there enough resources to restart a Tx queue? 389 * @q: the Tx queue 390 * 391 * Checks if there are enough descriptors to restart a suspended Tx queue. 392 */ 393static __inline int 394should_restart_tx(const struct sge_txq *q) 395{ 396 unsigned int r = q->processed - q->cleaned; 397 398 return q->in_use - r < (q->size >> 1); 399} 400 401/** 402 * t3_sge_init - initialize SGE 403 * @adap: the adapter 404 * @p: the SGE parameters 405 * 406 * Performs SGE initialization needed every time after a chip reset. 407 * We do not initialize any of the queue sets here, instead the driver 408 * top-level must request those individually. We also do not enable DMA 409 * here, that should be done after the queues have been set up. 410 */ 411void 412t3_sge_init(adapter_t *adap, struct sge_params *p) 413{ 414 u_int ctrl, ups; 415 416 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 417 418 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 419 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 420 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 421 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 422#if SGE_NUM_GENBITS == 1 423 ctrl |= F_EGRGENCTRL; 424#endif 425 if (adap->params.rev > 0) { 426 if (!(adap->flags & (USING_MSIX | USING_MSI))) 427 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 428 } 429 t3_write_reg(adap, A_SG_CONTROL, ctrl); 430 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 431 V_LORCQDRBTHRSH(512)); 432 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 433 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 434 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 435 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 436 adap->params.rev < T3_REV_C ? 1000 : 500); 437 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 438 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 439 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 440 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 441 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 442} 443 444 445/** 446 * sgl_len - calculates the size of an SGL of the given capacity 447 * @n: the number of SGL entries 448 * 449 * Calculates the number of flits needed for a scatter/gather list that 450 * can hold the given number of entries. 451 */ 452static __inline unsigned int 453sgl_len(unsigned int n) 454{ 455 return ((3 * n) / 2 + (n & 1)); 456} 457 458/** 459 * get_imm_packet - return the next ingress packet buffer from a response 460 * @resp: the response descriptor containing the packet data 461 * 462 * Return a packet containing the immediate data of the given response. 463 */ 464static int 465get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 466{ 467 468 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 469 m->m_ext.ext_buf = NULL; 470 m->m_ext.ext_type = 0; 471 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 472 return (0); 473} 474 475static __inline u_int 476flits_to_desc(u_int n) 477{ 478 return (flit_desc_map[n]); 479} 480 481#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 482 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 483 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 484 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 485 F_HIRCQPARITYERROR) 486#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 487#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 488 F_RSPQDISABLED) 489 490/** 491 * t3_sge_err_intr_handler - SGE async event interrupt handler 492 * @adapter: the adapter 493 * 494 * Interrupt handler for SGE asynchronous (non-data) events. 495 */ 496void 497t3_sge_err_intr_handler(adapter_t *adapter) 498{ 499 unsigned int v, status; 500 501 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 502 if (status & SGE_PARERR) 503 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 504 status & SGE_PARERR); 505 if (status & SGE_FRAMINGERR) 506 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 507 status & SGE_FRAMINGERR); 508 if (status & F_RSPQCREDITOVERFOW) 509 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 510 511 if (status & F_RSPQDISABLED) { 512 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 513 514 CH_ALERT(adapter, 515 "packet delivered to disabled response queue (0x%x)\n", 516 (v >> S_RSPQ0DISABLED) & 0xff); 517 } 518 519 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 520 if (status & SGE_FATALERR) 521 t3_fatal_err(adapter); 522} 523 524void 525t3_sge_prep(adapter_t *adap, struct sge_params *p) 526{ 527 int i, nqsets; 528 529 nqsets = min(SGE_QSETS, mp_ncpus*4); 530 531 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 532 533 while (!powerof2(fl_q_size)) 534 fl_q_size--; 535#if __FreeBSD_version >= 700111 536 if (cxgb_use_16k_clusters) 537 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 538 else 539 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 540#else 541 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE); 542#endif 543 while (!powerof2(jumbo_q_size)) 544 jumbo_q_size--; 545 546 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 547 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 548 549 for (i = 0; i < SGE_QSETS; ++i) { 550 struct qset_params *q = p->qset + i; 551 552 if (adap->params.nports > 2) { 553 q->coalesce_usecs = 50; 554 } else { 555#ifdef INVARIANTS 556 q->coalesce_usecs = 10; 557#else 558 q->coalesce_usecs = 5; 559#endif 560 } 561 q->polling = 0; 562 q->rspq_size = RSPQ_Q_SIZE; 563 q->fl_size = fl_q_size; 564 q->jumbo_size = jumbo_q_size; 565 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 566 q->txq_size[TXQ_OFLD] = 1024; 567 q->txq_size[TXQ_CTRL] = 256; 568 q->cong_thres = 0; 569 } 570} 571 572int 573t3_sge_alloc(adapter_t *sc) 574{ 575 576 /* The parent tag. */ 577 if (bus_dma_tag_create( NULL, /* parent */ 578 1, 0, /* algnmnt, boundary */ 579 BUS_SPACE_MAXADDR, /* lowaddr */ 580 BUS_SPACE_MAXADDR, /* highaddr */ 581 NULL, NULL, /* filter, filterarg */ 582 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 583 BUS_SPACE_UNRESTRICTED, /* nsegments */ 584 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 585 0, /* flags */ 586 NULL, NULL, /* lock, lockarg */ 587 &sc->parent_dmat)) { 588 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 589 return (ENOMEM); 590 } 591 592 /* 593 * DMA tag for normal sized RX frames 594 */ 595 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 596 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 597 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 598 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 599 return (ENOMEM); 600 } 601 602 /* 603 * DMA tag for jumbo sized RX frames. 604 */ 605 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 606 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 607 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 608 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 609 return (ENOMEM); 610 } 611 612 /* 613 * DMA tag for TX frames. 614 */ 615 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 616 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 617 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 618 NULL, NULL, &sc->tx_dmat)) { 619 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 620 return (ENOMEM); 621 } 622 623 return (0); 624} 625 626int 627t3_sge_free(struct adapter * sc) 628{ 629 630 if (sc->tx_dmat != NULL) 631 bus_dma_tag_destroy(sc->tx_dmat); 632 633 if (sc->rx_jumbo_dmat != NULL) 634 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 635 636 if (sc->rx_dmat != NULL) 637 bus_dma_tag_destroy(sc->rx_dmat); 638 639 if (sc->parent_dmat != NULL) 640 bus_dma_tag_destroy(sc->parent_dmat); 641 642 return (0); 643} 644 645void 646t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 647{ 648 649 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 650 qs->rspq.polling = 0 /* p->polling */; 651} 652 653#if !defined(__i386__) && !defined(__amd64__) 654static void 655refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 656{ 657 struct refill_fl_cb_arg *cb_arg = arg; 658 659 cb_arg->error = error; 660 cb_arg->seg = segs[0]; 661 cb_arg->nseg = nseg; 662 663} 664#endif 665/** 666 * refill_fl - refill an SGE free-buffer list 667 * @sc: the controller softc 668 * @q: the free-list to refill 669 * @n: the number of new buffers to allocate 670 * 671 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 672 * The caller must assure that @n does not exceed the queue's capacity. 673 */ 674static void 675refill_fl(adapter_t *sc, struct sge_fl *q, int n) 676{ 677 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 678 struct rx_desc *d = &q->desc[q->pidx]; 679 struct refill_fl_cb_arg cb_arg; 680 struct mbuf *m; 681 caddr_t cl; 682 int err, count = 0; 683 684 cb_arg.error = 0; 685 while (n--) { 686 /* 687 * We only allocate a cluster, mbuf allocation happens after rx 688 */ 689 if (q->zone == zone_pack) { 690 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 691 break; 692 cl = m->m_ext.ext_buf; 693 } else { 694 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 695 break; 696 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 697 uma_zfree(q->zone, cl); 698 break; 699 } 700 } 701 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 702 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 703 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 704 uma_zfree(q->zone, cl); 705 goto done; 706 } 707 sd->flags |= RX_SW_DESC_MAP_CREATED; 708 } 709#if !defined(__i386__) && !defined(__amd64__) 710 err = bus_dmamap_load(q->entry_tag, sd->map, 711 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 712 713 if (err != 0 || cb_arg.error) { 714 if (q->zone == zone_pack) 715 uma_zfree(q->zone, cl); 716 m_free(m); 717 goto done; 718 } 719#else 720 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 721#endif 722 sd->flags |= RX_SW_DESC_INUSE; 723 sd->rxsd_cl = cl; 724 sd->m = m; 725 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 726 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 727 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 728 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 729 730 d++; 731 sd++; 732 733 if (++q->pidx == q->size) { 734 q->pidx = 0; 735 q->gen ^= 1; 736 sd = q->sdesc; 737 d = q->desc; 738 } 739 q->credits++; 740 count++; 741 } 742 743done: 744 if (count) 745 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 746} 747 748 749/** 750 * free_rx_bufs - free the Rx buffers on an SGE free list 751 * @sc: the controle softc 752 * @q: the SGE free list to clean up 753 * 754 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 755 * this queue should be stopped before calling this function. 756 */ 757static void 758free_rx_bufs(adapter_t *sc, struct sge_fl *q) 759{ 760 u_int cidx = q->cidx; 761 762 while (q->credits--) { 763 struct rx_sw_desc *d = &q->sdesc[cidx]; 764 765 if (d->flags & RX_SW_DESC_INUSE) { 766 bus_dmamap_unload(q->entry_tag, d->map); 767 bus_dmamap_destroy(q->entry_tag, d->map); 768 if (q->zone == zone_pack) { 769 m_init(d->m, zone_pack, MCLBYTES, 770 M_NOWAIT, MT_DATA, M_EXT); 771 uma_zfree(zone_pack, d->m); 772 } else { 773 m_init(d->m, zone_mbuf, MLEN, 774 M_NOWAIT, MT_DATA, 0); 775 uma_zfree(zone_mbuf, d->m); 776 uma_zfree(q->zone, d->rxsd_cl); 777 } 778 } 779 780 d->rxsd_cl = NULL; 781 d->m = NULL; 782 if (++cidx == q->size) 783 cidx = 0; 784 } 785} 786 787static __inline void 788__refill_fl(adapter_t *adap, struct sge_fl *fl) 789{ 790 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 791} 792 793static __inline void 794__refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 795{ 796 if ((fl->size - fl->credits) < max) 797 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 798} 799 800/** 801 * recycle_rx_buf - recycle a receive buffer 802 * @adapter: the adapter 803 * @q: the SGE free list 804 * @idx: index of buffer to recycle 805 * 806 * Recycles the specified buffer on the given free list by adding it at 807 * the next available slot on the list. 808 */ 809static void 810recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 811{ 812 struct rx_desc *from = &q->desc[idx]; 813 struct rx_desc *to = &q->desc[q->pidx]; 814 815 q->sdesc[q->pidx] = q->sdesc[idx]; 816 to->addr_lo = from->addr_lo; // already big endian 817 to->addr_hi = from->addr_hi; // likewise 818 wmb(); /* necessary ? */ 819 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 820 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 821 q->credits++; 822 823 if (++q->pidx == q->size) { 824 q->pidx = 0; 825 q->gen ^= 1; 826 } 827 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 828} 829 830static void 831alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 832{ 833 uint32_t *addr; 834 835 addr = arg; 836 *addr = segs[0].ds_addr; 837} 838 839static int 840alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 841 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 842 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 843{ 844 size_t len = nelem * elem_size; 845 void *s = NULL; 846 void *p = NULL; 847 int err; 848 849 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 850 BUS_SPACE_MAXADDR_32BIT, 851 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 852 len, 0, NULL, NULL, tag)) != 0) { 853 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 854 return (ENOMEM); 855 } 856 857 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 858 map)) != 0) { 859 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 860 return (ENOMEM); 861 } 862 863 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 864 bzero(p, len); 865 *(void **)desc = p; 866 867 if (sw_size) { 868 len = nelem * sw_size; 869 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 870 *(void **)sdesc = s; 871 } 872 if (parent_entry_tag == NULL) 873 return (0); 874 875 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 876 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 877 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 878 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 879 NULL, NULL, entry_tag)) != 0) { 880 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 881 return (ENOMEM); 882 } 883 return (0); 884} 885 886static void 887sge_slow_intr_handler(void *arg, int ncount) 888{ 889 adapter_t *sc = arg; 890 891 t3_slow_intr_handler(sc); 892} 893 894/** 895 * sge_timer_cb - perform periodic maintenance of an SGE qset 896 * @data: the SGE queue set to maintain 897 * 898 * Runs periodically from a timer to perform maintenance of an SGE queue 899 * set. It performs two tasks: 900 * 901 * a) Cleans up any completed Tx descriptors that may still be pending. 902 * Normal descriptor cleanup happens when new packets are added to a Tx 903 * queue so this timer is relatively infrequent and does any cleanup only 904 * if the Tx queue has not seen any new packets in a while. We make a 905 * best effort attempt to reclaim descriptors, in that we don't wait 906 * around if we cannot get a queue's lock (which most likely is because 907 * someone else is queueing new packets and so will also handle the clean 908 * up). Since control queues use immediate data exclusively we don't 909 * bother cleaning them up here. 910 * 911 * b) Replenishes Rx queues that have run out due to memory shortage. 912 * Normally new Rx buffers are added when existing ones are consumed but 913 * when out of memory a queue can become empty. We try to add only a few 914 * buffers here, the queue will be replenished fully as these new buffers 915 * are used up if memory shortage has subsided. 916 * 917 * c) Return coalesced response queue credits in case a response queue is 918 * starved. 919 * 920 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 921 * fifo overflows and the FW doesn't implement any recovery scheme yet. 922 */ 923static void 924sge_timer_cb(void *arg) 925{ 926 adapter_t *sc = arg; 927 if ((sc->flags & USING_MSIX) == 0) { 928 929 struct port_info *pi; 930 struct sge_qset *qs; 931 struct sge_txq *txq; 932 int i, j; 933 int reclaim_ofl, refill_rx; 934 935 if (sc->open_device_map == 0) 936 return; 937 938 for (i = 0; i < sc->params.nports; i++) { 939 pi = &sc->port[i]; 940 for (j = 0; j < pi->nqsets; j++) { 941 qs = &sc->sge.qs[pi->first_qset + j]; 942 txq = &qs->txq[0]; 943 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 944 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 945 (qs->fl[1].credits < qs->fl[1].size)); 946 if (reclaim_ofl || refill_rx) { 947 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 948 break; 949 } 950 } 951 } 952 } 953 954 if (sc->params.nports > 2) { 955 int i; 956 957 for_each_port(sc, i) { 958 struct port_info *pi = &sc->port[i]; 959 960 t3_write_reg(sc, A_SG_KDOORBELL, 961 F_SELEGRCNTX | 962 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 963 } 964 } 965 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 966 sc->open_device_map != 0) 967 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 968} 969 970/* 971 * This is meant to be a catch-all function to keep sge state private 972 * to sge.c 973 * 974 */ 975int 976t3_sge_init_adapter(adapter_t *sc) 977{ 978 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 979 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 980 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 981 return (0); 982} 983 984int 985t3_sge_reset_adapter(adapter_t *sc) 986{ 987 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 988 return (0); 989} 990 991int 992t3_sge_init_port(struct port_info *pi) 993{ 994 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 995 return (0); 996} 997 998/** 999 * refill_rspq - replenish an SGE response queue 1000 * @adapter: the adapter 1001 * @q: the response queue to replenish 1002 * @credits: how many new responses to make available 1003 * 1004 * Replenishes a response queue by making the supplied number of responses 1005 * available to HW. 1006 */ 1007static __inline void 1008refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1009{ 1010 1011 /* mbufs are allocated on demand when a rspq entry is processed. */ 1012 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1013 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1014} 1015 1016static void 1017sge_txq_reclaim_handler(void *arg, int ncount) 1018{ 1019 struct sge_qset *qs = arg; 1020 int i; 1021 1022 for (i = 0; i < 3; i++) 1023 reclaim_completed_tx(qs, 16, i); 1024} 1025 1026static void 1027sge_timer_reclaim(void *arg, int ncount) 1028{ 1029 struct port_info *pi = arg; 1030 int i, nqsets = pi->nqsets; 1031 adapter_t *sc = pi->adapter; 1032 struct sge_qset *qs; 1033 struct mtx *lock; 1034 1035 KASSERT((sc->flags & USING_MSIX) == 0, 1036 ("can't call timer reclaim for msi-x")); 1037 1038 for (i = 0; i < nqsets; i++) { 1039 qs = &sc->sge.qs[pi->first_qset + i]; 1040 1041 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1042 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1043 &sc->sge.qs[0].rspq.lock; 1044 1045 if (mtx_trylock(lock)) { 1046 /* XXX currently assume that we are *NOT* polling */ 1047 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1048 1049 if (qs->fl[0].credits < qs->fl[0].size - 16) 1050 __refill_fl(sc, &qs->fl[0]); 1051 if (qs->fl[1].credits < qs->fl[1].size - 16) 1052 __refill_fl(sc, &qs->fl[1]); 1053 1054 if (status & (1 << qs->rspq.cntxt_id)) { 1055 if (qs->rspq.credits) { 1056 refill_rspq(sc, &qs->rspq, 1); 1057 qs->rspq.credits--; 1058 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1059 1 << qs->rspq.cntxt_id); 1060 } 1061 } 1062 mtx_unlock(lock); 1063 } 1064 } 1065} 1066 1067/** 1068 * init_qset_cntxt - initialize an SGE queue set context info 1069 * @qs: the queue set 1070 * @id: the queue set id 1071 * 1072 * Initializes the TIDs and context ids for the queues of a queue set. 1073 */ 1074static void 1075init_qset_cntxt(struct sge_qset *qs, u_int id) 1076{ 1077 1078 qs->rspq.cntxt_id = id; 1079 qs->fl[0].cntxt_id = 2 * id; 1080 qs->fl[1].cntxt_id = 2 * id + 1; 1081 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1082 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1083 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1084 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1085 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1086 1087 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1088 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1089 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1090} 1091 1092 1093static void 1094txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1095{ 1096 txq->in_use += ndesc; 1097 /* 1098 * XXX we don't handle stopping of queue 1099 * presumably start handles this when we bump against the end 1100 */ 1101 txqs->gen = txq->gen; 1102 txq->unacked += ndesc; 1103 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1104 txq->unacked &= 31; 1105 txqs->pidx = txq->pidx; 1106 txq->pidx += ndesc; 1107#ifdef INVARIANTS 1108 if (((txqs->pidx > txq->cidx) && 1109 (txq->pidx < txqs->pidx) && 1110 (txq->pidx >= txq->cidx)) || 1111 ((txqs->pidx < txq->cidx) && 1112 (txq->pidx >= txq-> cidx)) || 1113 ((txqs->pidx < txq->cidx) && 1114 (txq->cidx < txqs->pidx))) 1115 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1116 txqs->pidx, txq->pidx, txq->cidx); 1117#endif 1118 if (txq->pidx >= txq->size) { 1119 txq->pidx -= txq->size; 1120 txq->gen ^= 1; 1121 } 1122 1123} 1124 1125/** 1126 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1127 * @m: the packet mbufs 1128 * @nsegs: the number of segments 1129 * 1130 * Returns the number of Tx descriptors needed for the given Ethernet 1131 * packet. Ethernet packets require addition of WR and CPL headers. 1132 */ 1133static __inline unsigned int 1134calc_tx_descs(const struct mbuf *m, int nsegs) 1135{ 1136 unsigned int flits; 1137 1138 if (m->m_pkthdr.len <= PIO_LEN) 1139 return 1; 1140 1141 flits = sgl_len(nsegs) + 2; 1142#ifdef TSO_SUPPORTED 1143 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1144 flits++; 1145#endif 1146 return flits_to_desc(flits); 1147} 1148 1149static unsigned int 1150busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1151 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1152{ 1153 struct mbuf *m0; 1154 int err, pktlen, pass = 0; 1155 bus_dma_tag_t tag = txq->entry_tag; 1156 1157retry: 1158 err = 0; 1159 m0 = *m; 1160 pktlen = m0->m_pkthdr.len; 1161#if defined(__i386__) || defined(__amd64__) 1162 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) { 1163 goto done; 1164 } else 1165#endif 1166 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0); 1167 1168 if (err == 0) { 1169 goto done; 1170 } 1171 if (err == EFBIG && pass == 0) { 1172 pass = 1; 1173 /* Too many segments, try to defrag */ 1174 m0 = m_defrag(m0, M_DONTWAIT); 1175 if (m0 == NULL) { 1176 m_freem(*m); 1177 *m = NULL; 1178 return (ENOBUFS); 1179 } 1180 *m = m0; 1181 goto retry; 1182 } else if (err == ENOMEM) { 1183 return (err); 1184 } if (err) { 1185 if (cxgb_debug) 1186 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1187 m_freem(m0); 1188 *m = NULL; 1189 return (err); 1190 } 1191done: 1192#if !defined(__i386__) && !defined(__amd64__) 1193 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE); 1194#endif 1195 txsd->flags |= TX_SW_DESC_MAPPED; 1196 1197 return (0); 1198} 1199 1200/** 1201 * make_sgl - populate a scatter/gather list for a packet 1202 * @sgp: the SGL to populate 1203 * @segs: the packet dma segments 1204 * @nsegs: the number of segments 1205 * 1206 * Generates a scatter/gather list for the buffers that make up a packet 1207 * and returns the SGL size in 8-byte words. The caller must size the SGL 1208 * appropriately. 1209 */ 1210static __inline void 1211make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1212{ 1213 int i, idx; 1214 1215 for (idx = 0, i = 0; i < nsegs; i++) { 1216 /* 1217 * firmware doesn't like empty segments 1218 */ 1219 if (segs[i].ds_len == 0) 1220 continue; 1221 if (i && idx == 0) 1222 ++sgp; 1223 1224 sgp->len[idx] = htobe32(segs[i].ds_len); 1225 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1226 idx ^= 1; 1227 } 1228 1229 if (idx) { 1230 sgp->len[idx] = 0; 1231 sgp->addr[idx] = 0; 1232 } 1233} 1234 1235/** 1236 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1237 * @adap: the adapter 1238 * @q: the Tx queue 1239 * 1240 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1241 * where the HW is going to sleep just after we checked, however, 1242 * then the interrupt handler will detect the outstanding TX packet 1243 * and ring the doorbell for us. 1244 * 1245 * When GTS is disabled we unconditionally ring the doorbell. 1246 */ 1247static __inline void 1248check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1249{ 1250#if USE_GTS 1251 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1252 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1253 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1254#ifdef T3_TRACE 1255 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1256 q->cntxt_id); 1257#endif 1258 t3_write_reg(adap, A_SG_KDOORBELL, 1259 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1260 } 1261#else 1262 wmb(); /* write descriptors before telling HW */ 1263 t3_write_reg(adap, A_SG_KDOORBELL, 1264 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1265#endif 1266} 1267 1268static __inline void 1269wr_gen2(struct tx_desc *d, unsigned int gen) 1270{ 1271#if SGE_NUM_GENBITS == 2 1272 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1273#endif 1274} 1275 1276/** 1277 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1278 * @ndesc: number of Tx descriptors spanned by the SGL 1279 * @txd: first Tx descriptor to be written 1280 * @txqs: txq state (generation and producer index) 1281 * @txq: the SGE Tx queue 1282 * @sgl: the SGL 1283 * @flits: number of flits to the start of the SGL in the first descriptor 1284 * @sgl_flits: the SGL size in flits 1285 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1286 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1287 * 1288 * Write a work request header and an associated SGL. If the SGL is 1289 * small enough to fit into one Tx descriptor it has already been written 1290 * and we just need to write the WR header. Otherwise we distribute the 1291 * SGL across the number of descriptors it spans. 1292 */ 1293static void 1294write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1295 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1296 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1297{ 1298 1299 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1300 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1301 1302 if (__predict_true(ndesc == 1)) { 1303 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1304 V_WR_SGLSFLT(flits)) | wr_hi, 1305 htonl(V_WR_LEN(flits + sgl_flits) | 1306 V_WR_GEN(txqs->gen)) | wr_lo); 1307 /* XXX gen? */ 1308 wr_gen2(txd, txqs->gen); 1309 1310 } else { 1311 unsigned int ogen = txqs->gen; 1312 const uint64_t *fp = (const uint64_t *)sgl; 1313 struct work_request_hdr *wp = wrp; 1314 1315 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1316 V_WR_SGLSFLT(flits)) | wr_hi; 1317 1318 while (sgl_flits) { 1319 unsigned int avail = WR_FLITS - flits; 1320 1321 if (avail > sgl_flits) 1322 avail = sgl_flits; 1323 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1324 sgl_flits -= avail; 1325 ndesc--; 1326 if (!sgl_flits) 1327 break; 1328 1329 fp += avail; 1330 txd++; 1331 txsd++; 1332 if (++txqs->pidx == txq->size) { 1333 txqs->pidx = 0; 1334 txqs->gen ^= 1; 1335 txd = txq->desc; 1336 txsd = txq->sdesc; 1337 } 1338 1339 /* 1340 * when the head of the mbuf chain 1341 * is freed all clusters will be freed 1342 * with it 1343 */ 1344 wrp = (struct work_request_hdr *)txd; 1345 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1346 V_WR_SGLSFLT(1)) | wr_hi; 1347 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1348 sgl_flits + 1)) | 1349 V_WR_GEN(txqs->gen)) | wr_lo; 1350 wr_gen2(txd, txqs->gen); 1351 flits = 1; 1352 } 1353 wrp->wrh_hi |= htonl(F_WR_EOP); 1354 wmb(); 1355 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1356 wr_gen2((struct tx_desc *)wp, ogen); 1357 } 1358} 1359 1360/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1361#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1362 1363#ifdef VLAN_SUPPORTED 1364#define GET_VTAG(cntrl, m) \ 1365do { \ 1366 if ((m)->m_flags & M_VLANTAG) \ 1367 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1368} while (0) 1369 1370#else 1371#define GET_VTAG(cntrl, m) 1372#endif 1373 1374static int 1375t3_encap(struct sge_qset *qs, struct mbuf **m) 1376{ 1377 adapter_t *sc; 1378 struct mbuf *m0; 1379 struct sge_txq *txq; 1380 struct txq_state txqs; 1381 struct port_info *pi; 1382 unsigned int ndesc, flits, cntrl, mlen; 1383 int err, nsegs, tso_info = 0; 1384 1385 struct work_request_hdr *wrp; 1386 struct tx_sw_desc *txsd; 1387 struct sg_ent *sgp, *sgl; 1388 uint32_t wr_hi, wr_lo, sgl_flits; 1389 bus_dma_segment_t segs[TX_MAX_SEGS]; 1390 1391 struct tx_desc *txd; 1392 1393 pi = qs->port; 1394 sc = pi->adapter; 1395 txq = &qs->txq[TXQ_ETH]; 1396 txd = &txq->desc[txq->pidx]; 1397 txsd = &txq->sdesc[txq->pidx]; 1398 sgl = txq->txq_sgl; 1399 1400 prefetch(txd); 1401 m0 = *m; 1402 1403 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); 1404 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); 1405 1406 mtx_assert(&qs->lock, MA_OWNED); 1407 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1408 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1409 1410#ifdef VLAN_SUPPORTED 1411 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1412 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1413 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1414#endif 1415 if (m0->m_nextpkt != NULL) { 1416 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1417 ndesc = 1; 1418 mlen = 0; 1419 } else { 1420 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1421 &m0, segs, &nsegs))) { 1422 if (cxgb_debug) 1423 printf("failed ... err=%d\n", err); 1424 return (err); 1425 } 1426 mlen = m0->m_pkthdr.len; 1427 ndesc = calc_tx_descs(m0, nsegs); 1428 } 1429 txq_prod(txq, ndesc, &txqs); 1430 1431 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1432 txsd->m = m0; 1433 1434 if (m0->m_nextpkt != NULL) { 1435 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1436 int i, fidx; 1437 1438 if (nsegs > 7) 1439 panic("trying to coalesce %d packets in to one WR", nsegs); 1440 txq->txq_coalesced += nsegs; 1441 wrp = (struct work_request_hdr *)txd; 1442 flits = nsegs*2 + 1; 1443 1444 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1445 struct cpl_tx_pkt_batch_entry *cbe; 1446 uint64_t flit; 1447 uint32_t *hflit = (uint32_t *)&flit; 1448 int cflags = m0->m_pkthdr.csum_flags; 1449 1450 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1451 GET_VTAG(cntrl, m0); 1452 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1453 if (__predict_false(!(cflags & CSUM_IP))) 1454 cntrl |= F_TXPKT_IPCSUM_DIS; 1455 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) 1456 cntrl |= F_TXPKT_L4CSUM_DIS; 1457 1458 hflit[0] = htonl(cntrl); 1459 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1460 flit |= htobe64(1 << 24); 1461 cbe = &cpl_batch->pkt_entry[i]; 1462 cbe->cntrl = hflit[0]; 1463 cbe->len = hflit[1]; 1464 cbe->addr = htobe64(segs[i].ds_addr); 1465 } 1466 1467 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1468 V_WR_SGLSFLT(flits)) | 1469 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1470 wr_lo = htonl(V_WR_LEN(flits) | 1471 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1472 set_wr_hdr(wrp, wr_hi, wr_lo); 1473 wmb(); 1474 wr_gen2(txd, txqs.gen); 1475 check_ring_tx_db(sc, txq); 1476 return (0); 1477 } else if (tso_info) { 1478 int min_size = TCPPKTHDRSIZE, eth_type, tagged; 1479 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1480 struct ip *ip; 1481 struct tcphdr *tcp; 1482 char *pkthdr; 1483 1484 txd->flit[2] = 0; 1485 GET_VTAG(cntrl, m0); 1486 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1487 hdr->cntrl = htonl(cntrl); 1488 hdr->len = htonl(mlen | 0x80000000); 1489 1490 DPRINTF("tso buf len=%d\n", mlen); 1491 1492 tagged = m0->m_flags & M_VLANTAG; 1493 if (!tagged) 1494 min_size -= ETHER_VLAN_ENCAP_LEN; 1495 1496 if (__predict_false(mlen < min_size)) { 1497 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1498 m0, mlen, m0->m_pkthdr.tso_segsz, 1499 m0->m_pkthdr.csum_flags, m0->m_flags); 1500 panic("tx tso packet too small"); 1501 } 1502 1503 /* Make sure that ether, ip, tcp headers are all in m0 */ 1504 if (__predict_false(m0->m_len < min_size)) { 1505 m0 = m_pullup(m0, min_size); 1506 if (__predict_false(m0 == NULL)) { 1507 /* XXX panic probably an overreaction */ 1508 panic("couldn't fit header into mbuf"); 1509 } 1510 } 1511 pkthdr = m0->m_data; 1512 1513 if (tagged) { 1514 eth_type = CPL_ETH_II_VLAN; 1515 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1516 ETHER_VLAN_ENCAP_LEN); 1517 } else { 1518 eth_type = CPL_ETH_II; 1519 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1520 } 1521 tcp = (struct tcphdr *)((uint8_t *)ip + 1522 sizeof(*ip)); 1523 1524 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1525 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1526 V_LSO_TCPHDR_WORDS(tcp->th_off); 1527 hdr->lso_info = htonl(tso_info); 1528 1529 if (__predict_false(mlen <= PIO_LEN)) { 1530 /* pkt not undersized but fits in PIO_LEN 1531 * Indicates a TSO bug at the higher levels. 1532 * 1533 */ 1534 DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1535 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags); 1536 txsd->m = NULL; 1537 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1538 flits = (mlen + 7) / 8 + 3; 1539 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1540 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1541 F_WR_SOP | F_WR_EOP | txqs.compl); 1542 wr_lo = htonl(V_WR_LEN(flits) | 1543 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1544 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1545 wmb(); 1546 wr_gen2(txd, txqs.gen); 1547 check_ring_tx_db(sc, txq); 1548 return (0); 1549 } 1550 flits = 3; 1551 } else { 1552 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1553 1554 GET_VTAG(cntrl, m0); 1555 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1556 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1557 cntrl |= F_TXPKT_IPCSUM_DIS; 1558 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1559 cntrl |= F_TXPKT_L4CSUM_DIS; 1560 cpl->cntrl = htonl(cntrl); 1561 cpl->len = htonl(mlen | 0x80000000); 1562 1563 if (mlen <= PIO_LEN) { 1564 txsd->m = NULL; 1565 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1566 flits = (mlen + 7) / 8 + 2; 1567 1568 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1569 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1570 F_WR_SOP | F_WR_EOP | txqs.compl); 1571 wr_lo = htonl(V_WR_LEN(flits) | 1572 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1573 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1574 wmb(); 1575 wr_gen2(txd, txqs.gen); 1576 check_ring_tx_db(sc, txq); 1577 return (0); 1578 } 1579 flits = 2; 1580 } 1581 wrp = (struct work_request_hdr *)txd; 1582 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1583 make_sgl(sgp, segs, nsegs); 1584 1585 sgl_flits = sgl_len(nsegs); 1586 1587 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1588 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1589 wr_lo = htonl(V_WR_TID(txq->token)); 1590 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1591 sgl_flits, wr_hi, wr_lo); 1592 check_ring_tx_db(pi->adapter, txq); 1593 1594 return (0); 1595} 1596 1597void 1598cxgb_tx_watchdog(void *arg) 1599{ 1600 struct sge_qset *qs = arg; 1601 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1602 1603 if (qs->coalescing != 0 && 1604 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1605 TXQ_RING_EMPTY(qs)) 1606 qs->coalescing = 0; 1607 else if (qs->coalescing == 0 && 1608 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1609 qs->coalescing = 1; 1610 if (TXQ_TRYLOCK(qs)) { 1611 qs->qs_flags |= QS_FLUSHING; 1612 cxgb_start_locked(qs); 1613 qs->qs_flags &= ~QS_FLUSHING; 1614 TXQ_UNLOCK(qs); 1615 } 1616 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1617 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1618 qs, txq->txq_watchdog.c_cpu); 1619} 1620 1621static void 1622cxgb_tx_timeout(void *arg) 1623{ 1624 struct sge_qset *qs = arg; 1625 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1626 1627 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1628 qs->coalescing = 1; 1629 if (TXQ_TRYLOCK(qs)) { 1630 qs->qs_flags |= QS_TIMEOUT; 1631 cxgb_start_locked(qs); 1632 qs->qs_flags &= ~QS_TIMEOUT; 1633 TXQ_UNLOCK(qs); 1634 } 1635} 1636 1637static void 1638cxgb_start_locked(struct sge_qset *qs) 1639{ 1640 struct mbuf *m_head = NULL; 1641 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1642 int avail, txmax; 1643 int in_use_init = txq->in_use; 1644 struct port_info *pi = qs->port; 1645 struct ifnet *ifp = pi->ifp; 1646 avail = txq->size - txq->in_use - 4; 1647 txmax = min(TX_START_MAX_DESC, avail); 1648 1649 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1650 reclaim_completed_tx(qs, 0, TXQ_ETH); 1651 1652 if (!pi->link_config.link_ok) { 1653 TXQ_RING_FLUSH(qs); 1654 return; 1655 } 1656 TXQ_LOCK_ASSERT(qs); 1657 while ((txq->in_use - in_use_init < txmax) && 1658 !TXQ_RING_EMPTY(qs) && 1659 (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1660 pi->link_config.link_ok) { 1661 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1662 1663 if ((m_head = cxgb_dequeue(qs)) == NULL) 1664 break; 1665 /* 1666 * Encapsulation can modify our pointer, and or make it 1667 * NULL on failure. In that event, we can't requeue. 1668 */ 1669 if (t3_encap(qs, &m_head) || m_head == NULL) 1670 break; 1671 1672 /* Send a copy of the frame to the BPF listener */ 1673 ETHER_BPF_MTAP(ifp, m_head); 1674 1675 /* 1676 * We sent via PIO, no longer need a copy 1677 */ 1678 if (m_head->m_nextpkt == NULL && 1679 m_head->m_pkthdr.len <= PIO_LEN) 1680 m_freem(m_head); 1681 1682 m_head = NULL; 1683 } 1684 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1685 pi->link_config.link_ok) 1686 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1687 qs, txq->txq_timer.c_cpu); 1688 if (m_head != NULL) 1689 m_freem(m_head); 1690} 1691 1692static int 1693cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1694{ 1695 struct port_info *pi = qs->port; 1696 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1697 struct buf_ring *br = txq->txq_mr; 1698 int error, avail; 1699 1700 avail = txq->size - txq->in_use; 1701 TXQ_LOCK_ASSERT(qs); 1702 1703 /* 1704 * We can only do a direct transmit if the following are true: 1705 * - we aren't coalescing (ring < 3/4 full) 1706 * - the link is up -- checked in caller 1707 * - there are no packets enqueued already 1708 * - there is space in hardware transmit queue 1709 */ 1710 if (check_pkt_coalesce(qs) == 0 && 1711 TXQ_RING_EMPTY(qs) && avail > 4) { 1712 if (t3_encap(qs, &m)) { 1713 if (m != NULL && 1714 (error = drbr_enqueue(ifp, br, m)) != 0) 1715 return (error); 1716 } else { 1717 /* 1718 * We've bypassed the buf ring so we need to update 1719 * the stats directly 1720 */ 1721 txq->txq_direct_packets++; 1722 txq->txq_direct_bytes += m->m_pkthdr.len; 1723 /* 1724 ** Send a copy of the frame to the BPF 1725 ** listener and set the watchdog on. 1726 */ 1727 ETHER_BPF_MTAP(ifp, m); 1728 /* 1729 * We sent via PIO, no longer need a copy 1730 */ 1731 if (m->m_pkthdr.len <= PIO_LEN) 1732 m_freem(m); 1733 1734 } 1735 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1736 return (error); 1737 1738 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1739 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1740 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1741 cxgb_start_locked(qs); 1742 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1743 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1744 qs, txq->txq_timer.c_cpu); 1745 return (0); 1746} 1747 1748int 1749cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1750{ 1751 struct sge_qset *qs; 1752 struct port_info *pi = ifp->if_softc; 1753 int error, qidx = pi->first_qset; 1754 1755 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1756 ||(!pi->link_config.link_ok)) { 1757 m_freem(m); 1758 return (0); 1759 } 1760 1761 if (m->m_flags & M_FLOWID) 1762 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1763 1764 qs = &pi->adapter->sge.qs[qidx]; 1765 1766 if (TXQ_TRYLOCK(qs)) { 1767 /* XXX running */ 1768 error = cxgb_transmit_locked(ifp, qs, m); 1769 TXQ_UNLOCK(qs); 1770 } else 1771 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1772 return (error); 1773} 1774void 1775cxgb_start(struct ifnet *ifp) 1776{ 1777 struct port_info *pi = ifp->if_softc; 1778 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset]; 1779 1780 if (!pi->link_config.link_ok) 1781 return; 1782 1783 TXQ_LOCK(qs); 1784 cxgb_start_locked(qs); 1785 TXQ_UNLOCK(qs); 1786} 1787 1788void 1789cxgb_qflush(struct ifnet *ifp) 1790{ 1791 /* 1792 * flush any enqueued mbufs in the buf_rings 1793 * and in the transmit queues 1794 * no-op for now 1795 */ 1796 return; 1797} 1798 1799/** 1800 * write_imm - write a packet into a Tx descriptor as immediate data 1801 * @d: the Tx descriptor to write 1802 * @m: the packet 1803 * @len: the length of packet data to write as immediate data 1804 * @gen: the generation bit value to write 1805 * 1806 * Writes a packet as immediate data into a Tx descriptor. The packet 1807 * contains a work request at its beginning. We must write the packet 1808 * carefully so the SGE doesn't read accidentally before it's written in 1809 * its entirety. 1810 */ 1811static __inline void 1812write_imm(struct tx_desc *d, struct mbuf *m, 1813 unsigned int len, unsigned int gen) 1814{ 1815 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1816 struct work_request_hdr *to = (struct work_request_hdr *)d; 1817 uint32_t wr_hi, wr_lo; 1818 1819 if (len > WR_LEN) 1820 panic("len too big %d\n", len); 1821 if (len < sizeof(*from)) 1822 panic("len too small %d", len); 1823 1824 memcpy(&to[1], &from[1], len - sizeof(*from)); 1825 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1826 V_WR_BCNTLFLT(len & 7)); 1827 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | 1828 V_WR_LEN((len + 7) / 8)); 1829 set_wr_hdr(to, wr_hi, wr_lo); 1830 wmb(); 1831 wr_gen2(d, gen); 1832 1833 /* 1834 * This check is a hack we should really fix the logic so 1835 * that this can't happen 1836 */ 1837 if (m->m_type != MT_DONTFREE) 1838 m_freem(m); 1839 1840} 1841 1842/** 1843 * check_desc_avail - check descriptor availability on a send queue 1844 * @adap: the adapter 1845 * @q: the TX queue 1846 * @m: the packet needing the descriptors 1847 * @ndesc: the number of Tx descriptors needed 1848 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1849 * 1850 * Checks if the requested number of Tx descriptors is available on an 1851 * SGE send queue. If the queue is already suspended or not enough 1852 * descriptors are available the packet is queued for later transmission. 1853 * Must be called with the Tx queue locked. 1854 * 1855 * Returns 0 if enough descriptors are available, 1 if there aren't 1856 * enough descriptors and the packet has been queued, and 2 if the caller 1857 * needs to retry because there weren't enough descriptors at the 1858 * beginning of the call but some freed up in the mean time. 1859 */ 1860static __inline int 1861check_desc_avail(adapter_t *adap, struct sge_txq *q, 1862 struct mbuf *m, unsigned int ndesc, 1863 unsigned int qid) 1864{ 1865 /* 1866 * XXX We currently only use this for checking the control queue 1867 * the control queue is only used for binding qsets which happens 1868 * at init time so we are guaranteed enough descriptors 1869 */ 1870 if (__predict_false(!mbufq_empty(&q->sendq))) { 1871addq_exit: mbufq_tail(&q->sendq, m); 1872 return 1; 1873 } 1874 if (__predict_false(q->size - q->in_use < ndesc)) { 1875 1876 struct sge_qset *qs = txq_to_qset(q, qid); 1877 1878 setbit(&qs->txq_stopped, qid); 1879 if (should_restart_tx(q) && 1880 test_and_clear_bit(qid, &qs->txq_stopped)) 1881 return 2; 1882 1883 q->stops++; 1884 goto addq_exit; 1885 } 1886 return 0; 1887} 1888 1889 1890/** 1891 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1892 * @q: the SGE control Tx queue 1893 * 1894 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1895 * that send only immediate data (presently just the control queues) and 1896 * thus do not have any mbufs 1897 */ 1898static __inline void 1899reclaim_completed_tx_imm(struct sge_txq *q) 1900{ 1901 unsigned int reclaim = q->processed - q->cleaned; 1902 1903 q->in_use -= reclaim; 1904 q->cleaned += reclaim; 1905} 1906 1907static __inline int 1908immediate(const struct mbuf *m) 1909{ 1910 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1911} 1912 1913/** 1914 * ctrl_xmit - send a packet through an SGE control Tx queue 1915 * @adap: the adapter 1916 * @q: the control queue 1917 * @m: the packet 1918 * 1919 * Send a packet through an SGE control Tx queue. Packets sent through 1920 * a control queue must fit entirely as immediate data in a single Tx 1921 * descriptor and have no page fragments. 1922 */ 1923static int 1924ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1925{ 1926 int ret; 1927 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1928 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1929 1930 if (__predict_false(!immediate(m))) { 1931 m_freem(m); 1932 return 0; 1933 } 1934 1935 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1936 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1937 1938 TXQ_LOCK(qs); 1939again: reclaim_completed_tx_imm(q); 1940 1941 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1942 if (__predict_false(ret)) { 1943 if (ret == 1) { 1944 TXQ_UNLOCK(qs); 1945 log(LOG_ERR, "no desc available\n"); 1946 return (ENOSPC); 1947 } 1948 goto again; 1949 } 1950 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1951 1952 q->in_use++; 1953 if (++q->pidx >= q->size) { 1954 q->pidx = 0; 1955 q->gen ^= 1; 1956 } 1957 TXQ_UNLOCK(qs); 1958 t3_write_reg(adap, A_SG_KDOORBELL, 1959 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1960 return (0); 1961} 1962 1963 1964/** 1965 * restart_ctrlq - restart a suspended control queue 1966 * @qs: the queue set cotaining the control queue 1967 * 1968 * Resumes transmission on a suspended Tx control queue. 1969 */ 1970static void 1971restart_ctrlq(void *data, int npending) 1972{ 1973 struct mbuf *m; 1974 struct sge_qset *qs = (struct sge_qset *)data; 1975 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1976 adapter_t *adap = qs->port->adapter; 1977 1978 log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use); 1979 1980 TXQ_LOCK(qs); 1981again: reclaim_completed_tx_imm(q); 1982 1983 while (q->in_use < q->size && 1984 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1985 1986 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1987 1988 if (++q->pidx >= q->size) { 1989 q->pidx = 0; 1990 q->gen ^= 1; 1991 } 1992 q->in_use++; 1993 } 1994 if (!mbufq_empty(&q->sendq)) { 1995 setbit(&qs->txq_stopped, TXQ_CTRL); 1996 1997 if (should_restart_tx(q) && 1998 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1999 goto again; 2000 q->stops++; 2001 } 2002 TXQ_UNLOCK(qs); 2003 t3_write_reg(adap, A_SG_KDOORBELL, 2004 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2005} 2006 2007 2008/* 2009 * Send a management message through control queue 0 2010 */ 2011int 2012t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 2013{ 2014 return ctrl_xmit(adap, &adap->sge.qs[0], m); 2015} 2016 2017/** 2018 * free_qset - free the resources of an SGE queue set 2019 * @sc: the controller owning the queue set 2020 * @q: the queue set 2021 * 2022 * Release the HW and SW resources associated with an SGE queue set, such 2023 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 2024 * queue set must be quiesced prior to calling this. 2025 */ 2026static void 2027t3_free_qset(adapter_t *sc, struct sge_qset *q) 2028{ 2029 int i; 2030 2031 reclaim_completed_tx(q, 0, TXQ_ETH); 2032 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2033 if (q->txq[i].txq_mr != NULL) 2034 buf_ring_free(q->txq[i].txq_mr, M_DEVBUF); 2035 if (q->txq[i].txq_ifq != NULL) { 2036 ifq_delete(q->txq[i].txq_ifq); 2037 free(q->txq[i].txq_ifq, M_DEVBUF); 2038 } 2039 } 2040 2041 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2042 if (q->fl[i].desc) { 2043 mtx_lock_spin(&sc->sge.reg_lock); 2044 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2045 mtx_unlock_spin(&sc->sge.reg_lock); 2046 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2047 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2048 q->fl[i].desc_map); 2049 bus_dma_tag_destroy(q->fl[i].desc_tag); 2050 bus_dma_tag_destroy(q->fl[i].entry_tag); 2051 } 2052 if (q->fl[i].sdesc) { 2053 free_rx_bufs(sc, &q->fl[i]); 2054 free(q->fl[i].sdesc, M_DEVBUF); 2055 } 2056 } 2057 2058 mtx_unlock(&q->lock); 2059 MTX_DESTROY(&q->lock); 2060 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2061 if (q->txq[i].desc) { 2062 mtx_lock_spin(&sc->sge.reg_lock); 2063 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2064 mtx_unlock_spin(&sc->sge.reg_lock); 2065 bus_dmamap_unload(q->txq[i].desc_tag, 2066 q->txq[i].desc_map); 2067 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2068 q->txq[i].desc_map); 2069 bus_dma_tag_destroy(q->txq[i].desc_tag); 2070 bus_dma_tag_destroy(q->txq[i].entry_tag); 2071 } 2072 if (q->txq[i].sdesc) { 2073 free(q->txq[i].sdesc, M_DEVBUF); 2074 } 2075 } 2076 2077 if (q->rspq.desc) { 2078 mtx_lock_spin(&sc->sge.reg_lock); 2079 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2080 mtx_unlock_spin(&sc->sge.reg_lock); 2081 2082 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2083 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2084 q->rspq.desc_map); 2085 bus_dma_tag_destroy(q->rspq.desc_tag); 2086 MTX_DESTROY(&q->rspq.lock); 2087 } 2088 2089#ifdef LRO_SUPPORTED 2090 tcp_lro_free(&q->lro.ctrl); 2091#endif 2092 2093 bzero(q, sizeof(*q)); 2094} 2095 2096/** 2097 * t3_free_sge_resources - free SGE resources 2098 * @sc: the adapter softc 2099 * 2100 * Frees resources used by the SGE queue sets. 2101 */ 2102void 2103t3_free_sge_resources(adapter_t *sc) 2104{ 2105 int i, nqsets; 2106 2107 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2108 nqsets += sc->port[i].nqsets; 2109 2110 for (i = 0; i < nqsets; ++i) { 2111 TXQ_LOCK(&sc->sge.qs[i]); 2112 t3_free_qset(sc, &sc->sge.qs[i]); 2113 } 2114 2115} 2116 2117/** 2118 * t3_sge_start - enable SGE 2119 * @sc: the controller softc 2120 * 2121 * Enables the SGE for DMAs. This is the last step in starting packet 2122 * transfers. 2123 */ 2124void 2125t3_sge_start(adapter_t *sc) 2126{ 2127 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2128} 2129 2130/** 2131 * t3_sge_stop - disable SGE operation 2132 * @sc: the adapter 2133 * 2134 * Disables the DMA engine. This can be called in emeregencies (e.g., 2135 * from error interrupts) or from normal process context. In the latter 2136 * case it also disables any pending queue restart tasklets. Note that 2137 * if it is called in interrupt context it cannot disable the restart 2138 * tasklets as it cannot wait, however the tasklets will have no effect 2139 * since the doorbells are disabled and the driver will call this again 2140 * later from process context, at which time the tasklets will be stopped 2141 * if they are still running. 2142 */ 2143void 2144t3_sge_stop(adapter_t *sc) 2145{ 2146 int i, nqsets; 2147 2148 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2149 2150 if (sc->tq == NULL) 2151 return; 2152 2153 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2154 nqsets += sc->port[i].nqsets; 2155#ifdef notyet 2156 /* 2157 * 2158 * XXX 2159 */ 2160 for (i = 0; i < nqsets; ++i) { 2161 struct sge_qset *qs = &sc->sge.qs[i]; 2162 2163 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2164 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2165 } 2166#endif 2167} 2168 2169/** 2170 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2171 * @adapter: the adapter 2172 * @q: the Tx queue to reclaim descriptors from 2173 * @reclaimable: the number of descriptors to reclaim 2174 * @m_vec_size: maximum number of buffers to reclaim 2175 * @desc_reclaimed: returns the number of descriptors reclaimed 2176 * 2177 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2178 * Tx buffers. Called with the Tx queue lock held. 2179 * 2180 * Returns number of buffers of reclaimed 2181 */ 2182void 2183t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2184{ 2185 struct tx_sw_desc *txsd; 2186 unsigned int cidx, mask; 2187 struct sge_txq *q = &qs->txq[queue]; 2188 2189#ifdef T3_TRACE 2190 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2191 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2192#endif 2193 cidx = q->cidx; 2194 mask = q->size - 1; 2195 txsd = &q->sdesc[cidx]; 2196 2197 mtx_assert(&qs->lock, MA_OWNED); 2198 while (reclaimable--) { 2199 prefetch(q->sdesc[(cidx + 1) & mask].m); 2200 prefetch(q->sdesc[(cidx + 2) & mask].m); 2201 2202 if (txsd->m != NULL) { 2203 if (txsd->flags & TX_SW_DESC_MAPPED) { 2204 bus_dmamap_unload(q->entry_tag, txsd->map); 2205 txsd->flags &= ~TX_SW_DESC_MAPPED; 2206 } 2207 m_freem_list(txsd->m); 2208 txsd->m = NULL; 2209 } else 2210 q->txq_skipped++; 2211 2212 ++txsd; 2213 if (++cidx == q->size) { 2214 cidx = 0; 2215 txsd = q->sdesc; 2216 } 2217 } 2218 q->cidx = cidx; 2219 2220} 2221 2222/** 2223 * is_new_response - check if a response is newly written 2224 * @r: the response descriptor 2225 * @q: the response queue 2226 * 2227 * Returns true if a response descriptor contains a yet unprocessed 2228 * response. 2229 */ 2230static __inline int 2231is_new_response(const struct rsp_desc *r, 2232 const struct sge_rspq *q) 2233{ 2234 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2235} 2236 2237#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2238#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2239 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2240 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2241 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2242 2243/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2244#define NOMEM_INTR_DELAY 2500 2245 2246/** 2247 * write_ofld_wr - write an offload work request 2248 * @adap: the adapter 2249 * @m: the packet to send 2250 * @q: the Tx queue 2251 * @pidx: index of the first Tx descriptor to write 2252 * @gen: the generation value to use 2253 * @ndesc: number of descriptors the packet will occupy 2254 * 2255 * Write an offload work request to send the supplied packet. The packet 2256 * data already carry the work request with most fields populated. 2257 */ 2258static void 2259write_ofld_wr(adapter_t *adap, struct mbuf *m, 2260 struct sge_txq *q, unsigned int pidx, 2261 unsigned int gen, unsigned int ndesc, 2262 bus_dma_segment_t *segs, unsigned int nsegs) 2263{ 2264 unsigned int sgl_flits, flits; 2265 struct work_request_hdr *from; 2266 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 2267 struct tx_desc *d = &q->desc[pidx]; 2268 struct txq_state txqs; 2269 2270 if (immediate(m) && nsegs == 0) { 2271 write_imm(d, m, m->m_len, gen); 2272 return; 2273 } 2274 2275 /* Only TX_DATA builds SGLs */ 2276 from = mtod(m, struct work_request_hdr *); 2277 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 2278 2279 flits = m->m_len / 8; 2280 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 2281 2282 make_sgl(sgp, segs, nsegs); 2283 sgl_flits = sgl_len(nsegs); 2284 2285 txqs.gen = gen; 2286 txqs.pidx = pidx; 2287 txqs.compl = 0; 2288 2289 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 2290 from->wrh_hi, from->wrh_lo); 2291} 2292 2293/** 2294 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2295 * @m: the packet 2296 * 2297 * Returns the number of Tx descriptors needed for the given offload 2298 * packet. These packets are already fully constructed. 2299 */ 2300static __inline unsigned int 2301calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2302{ 2303 unsigned int flits, cnt = 0; 2304 int ndescs; 2305 2306 if (m->m_len <= WR_LEN && nsegs == 0) 2307 return (1); /* packet fits as immediate data */ 2308 2309 /* 2310 * This needs to be re-visited for TOE 2311 */ 2312 2313 cnt = nsegs; 2314 2315 /* headers */ 2316 flits = m->m_len / 8; 2317 2318 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2319 2320 return (ndescs); 2321} 2322 2323/** 2324 * ofld_xmit - send a packet through an offload queue 2325 * @adap: the adapter 2326 * @q: the Tx offload queue 2327 * @m: the packet 2328 * 2329 * Send an offload packet through an SGE offload queue. 2330 */ 2331static int 2332ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2333{ 2334 int ret, nsegs; 2335 unsigned int ndesc; 2336 unsigned int pidx, gen; 2337 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2338 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2339 struct tx_sw_desc *stx; 2340 2341 nsegs = m_get_sgllen(m); 2342 vsegs = m_get_sgl(m); 2343 ndesc = calc_tx_descs_ofld(m, nsegs); 2344 busdma_map_sgl(vsegs, segs, nsegs); 2345 2346 stx = &q->sdesc[q->pidx]; 2347 2348 TXQ_LOCK(qs); 2349again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2350 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2351 if (__predict_false(ret)) { 2352 if (ret == 1) { 2353 printf("no ofld desc avail\n"); 2354 2355 m_set_priority(m, ndesc); /* save for restart */ 2356 TXQ_UNLOCK(qs); 2357 return (EINTR); 2358 } 2359 goto again; 2360 } 2361 2362 gen = q->gen; 2363 q->in_use += ndesc; 2364 pidx = q->pidx; 2365 q->pidx += ndesc; 2366 if (q->pidx >= q->size) { 2367 q->pidx -= q->size; 2368 q->gen ^= 1; 2369 } 2370#ifdef T3_TRACE 2371 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2372 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2373 ndesc, pidx, skb->len, skb->len - skb->data_len, 2374 skb_shinfo(skb)->nr_frags); 2375#endif 2376 TXQ_UNLOCK(qs); 2377 2378 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2379 check_ring_tx_db(adap, q); 2380 return (0); 2381} 2382 2383/** 2384 * restart_offloadq - restart a suspended offload queue 2385 * @qs: the queue set cotaining the offload queue 2386 * 2387 * Resumes transmission on a suspended Tx offload queue. 2388 */ 2389static void 2390restart_offloadq(void *data, int npending) 2391{ 2392 struct mbuf *m; 2393 struct sge_qset *qs = data; 2394 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2395 adapter_t *adap = qs->port->adapter; 2396 bus_dma_segment_t segs[TX_MAX_SEGS]; 2397 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2398 int nsegs, cleaned; 2399 2400 TXQ_LOCK(qs); 2401again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2402 2403 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2404 unsigned int gen, pidx; 2405 unsigned int ndesc = m_get_priority(m); 2406 2407 if (__predict_false(q->size - q->in_use < ndesc)) { 2408 setbit(&qs->txq_stopped, TXQ_OFLD); 2409 if (should_restart_tx(q) && 2410 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2411 goto again; 2412 q->stops++; 2413 break; 2414 } 2415 2416 gen = q->gen; 2417 q->in_use += ndesc; 2418 pidx = q->pidx; 2419 q->pidx += ndesc; 2420 if (q->pidx >= q->size) { 2421 q->pidx -= q->size; 2422 q->gen ^= 1; 2423 } 2424 2425 (void)mbufq_dequeue(&q->sendq); 2426 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2427 TXQ_UNLOCK(qs); 2428 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2429 TXQ_LOCK(qs); 2430 } 2431#if USE_GTS 2432 set_bit(TXQ_RUNNING, &q->flags); 2433 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2434#endif 2435 TXQ_UNLOCK(qs); 2436 wmb(); 2437 t3_write_reg(adap, A_SG_KDOORBELL, 2438 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2439} 2440 2441/** 2442 * queue_set - return the queue set a packet should use 2443 * @m: the packet 2444 * 2445 * Maps a packet to the SGE queue set it should use. The desired queue 2446 * set is carried in bits 1-3 in the packet's priority. 2447 */ 2448static __inline int 2449queue_set(const struct mbuf *m) 2450{ 2451 return m_get_priority(m) >> 1; 2452} 2453 2454/** 2455 * is_ctrl_pkt - return whether an offload packet is a control packet 2456 * @m: the packet 2457 * 2458 * Determines whether an offload packet should use an OFLD or a CTRL 2459 * Tx queue. This is indicated by bit 0 in the packet's priority. 2460 */ 2461static __inline int 2462is_ctrl_pkt(const struct mbuf *m) 2463{ 2464 return m_get_priority(m) & 1; 2465} 2466 2467/** 2468 * t3_offload_tx - send an offload packet 2469 * @tdev: the offload device to send to 2470 * @m: the packet 2471 * 2472 * Sends an offload packet. We use the packet priority to select the 2473 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2474 * should be sent as regular or control, bits 1-3 select the queue set. 2475 */ 2476int 2477t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2478{ 2479 adapter_t *adap = tdev2adap(tdev); 2480 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2481 2482 if (__predict_false(is_ctrl_pkt(m))) 2483 return ctrl_xmit(adap, qs, m); 2484 2485 return ofld_xmit(adap, qs, m); 2486} 2487 2488/** 2489 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2490 * @tdev: the offload device that will be receiving the packets 2491 * @q: the SGE response queue that assembled the bundle 2492 * @m: the partial bundle 2493 * @n: the number of packets in the bundle 2494 * 2495 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2496 */ 2497static __inline void 2498deliver_partial_bundle(struct t3cdev *tdev, 2499 struct sge_rspq *q, 2500 struct mbuf *mbufs[], int n) 2501{ 2502 if (n) { 2503 q->offload_bundles++; 2504 cxgb_ofld_recv(tdev, mbufs, n); 2505 } 2506} 2507 2508static __inline int 2509rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2510 struct mbuf *m, struct mbuf *rx_gather[], 2511 unsigned int gather_idx) 2512{ 2513 2514 rq->offload_pkts++; 2515 m->m_pkthdr.header = mtod(m, void *); 2516 rx_gather[gather_idx++] = m; 2517 if (gather_idx == RX_BUNDLE_SIZE) { 2518 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2519 gather_idx = 0; 2520 rq->offload_bundles++; 2521 } 2522 return (gather_idx); 2523} 2524 2525static void 2526restart_tx(struct sge_qset *qs) 2527{ 2528 struct adapter *sc = qs->port->adapter; 2529 2530 2531 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2532 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2533 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2534 qs->txq[TXQ_OFLD].restarts++; 2535 DPRINTF("restarting TXQ_OFLD\n"); 2536 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2537 } 2538 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2539 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2540 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2541 qs->txq[TXQ_CTRL].in_use); 2542 2543 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2544 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2545 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2546 qs->txq[TXQ_CTRL].restarts++; 2547 DPRINTF("restarting TXQ_CTRL\n"); 2548 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2549 } 2550} 2551 2552/** 2553 * t3_sge_alloc_qset - initialize an SGE queue set 2554 * @sc: the controller softc 2555 * @id: the queue set id 2556 * @nports: how many Ethernet ports will be using this queue set 2557 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2558 * @p: configuration parameters for this queue set 2559 * @ntxq: number of Tx queues for the queue set 2560 * @pi: port info for queue set 2561 * 2562 * Allocate resources and initialize an SGE queue set. A queue set 2563 * comprises a response queue, two Rx free-buffer queues, and up to 3 2564 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2565 * queue, offload queue, and control queue. 2566 */ 2567int 2568t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2569 const struct qset_params *p, int ntxq, struct port_info *pi) 2570{ 2571 struct sge_qset *q = &sc->sge.qs[id]; 2572 int i, ret = 0; 2573 2574 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2575 q->port = pi; 2576 2577 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2578 2579 if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2580 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2581 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2582 goto err; 2583 } 2584 if ((q->txq[i].txq_ifq = 2585 malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO)) 2586 == NULL) { 2587 device_printf(sc->dev, "failed to allocate ifq\n"); 2588 goto err; 2589 } 2590 ifq_init(q->txq[i].txq_ifq, pi->ifp); 2591 callout_init(&q->txq[i].txq_timer, 1); 2592 callout_init(&q->txq[i].txq_watchdog, 1); 2593 q->txq[i].txq_timer.c_cpu = id % mp_ncpus; 2594 q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus; 2595 } 2596 init_qset_cntxt(q, id); 2597 q->idx = id; 2598 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2599 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2600 &q->fl[0].desc, &q->fl[0].sdesc, 2601 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2602 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2603 printf("error %d from alloc ring fl0\n", ret); 2604 goto err; 2605 } 2606 2607 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2608 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2609 &q->fl[1].desc, &q->fl[1].sdesc, 2610 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2611 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2612 printf("error %d from alloc ring fl1\n", ret); 2613 goto err; 2614 } 2615 2616 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2617 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2618 &q->rspq.desc_tag, &q->rspq.desc_map, 2619 NULL, NULL)) != 0) { 2620 printf("error %d from alloc ring rspq\n", ret); 2621 goto err; 2622 } 2623 2624 for (i = 0; i < ntxq; ++i) { 2625 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2626 2627 if ((ret = alloc_ring(sc, p->txq_size[i], 2628 sizeof(struct tx_desc), sz, 2629 &q->txq[i].phys_addr, &q->txq[i].desc, 2630 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2631 &q->txq[i].desc_map, 2632 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2633 printf("error %d from alloc ring tx %i\n", ret, i); 2634 goto err; 2635 } 2636 mbufq_init(&q->txq[i].sendq); 2637 q->txq[i].gen = 1; 2638 q->txq[i].size = p->txq_size[i]; 2639 } 2640 2641 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2642 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2643 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2644 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2645 2646 q->fl[0].gen = q->fl[1].gen = 1; 2647 q->fl[0].size = p->fl_size; 2648 q->fl[1].size = p->jumbo_size; 2649 2650 q->rspq.gen = 1; 2651 q->rspq.cidx = 0; 2652 q->rspq.size = p->rspq_size; 2653 2654 q->txq[TXQ_ETH].stop_thres = nports * 2655 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2656 2657 q->fl[0].buf_size = MCLBYTES; 2658 q->fl[0].zone = zone_pack; 2659 q->fl[0].type = EXT_PACKET; 2660#if __FreeBSD_version > 800000 2661 if (cxgb_use_16k_clusters) { 2662 q->fl[1].buf_size = MJUM16BYTES; 2663 q->fl[1].zone = zone_jumbo16; 2664 q->fl[1].type = EXT_JUMBO16; 2665 } else { 2666 q->fl[1].buf_size = MJUM9BYTES; 2667 q->fl[1].zone = zone_jumbo9; 2668 q->fl[1].type = EXT_JUMBO9; 2669 } 2670#else 2671 q->fl[1].buf_size = MJUMPAGESIZE; 2672 q->fl[1].zone = zone_jumbop; 2673 q->fl[1].type = EXT_JUMBOP; 2674#endif 2675 2676#ifdef LRO_SUPPORTED 2677 /* Allocate and setup the lro_ctrl structure */ 2678 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2679 ret = tcp_lro_init(&q->lro.ctrl); 2680 if (ret) { 2681 printf("error %d from tcp_lro_init\n", ret); 2682 goto err; 2683 } 2684 q->lro.ctrl.ifp = pi->ifp; 2685#endif 2686 2687 mtx_lock_spin(&sc->sge.reg_lock); 2688 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2689 q->rspq.phys_addr, q->rspq.size, 2690 q->fl[0].buf_size, 1, 0); 2691 if (ret) { 2692 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2693 goto err_unlock; 2694 } 2695 2696 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2697 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2698 q->fl[i].phys_addr, q->fl[i].size, 2699 q->fl[i].buf_size, p->cong_thres, 1, 2700 0); 2701 if (ret) { 2702 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2703 goto err_unlock; 2704 } 2705 } 2706 2707 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2708 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2709 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2710 1, 0); 2711 if (ret) { 2712 printf("error %d from t3_sge_init_ecntxt\n", ret); 2713 goto err_unlock; 2714 } 2715 2716 if (ntxq > 1) { 2717 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2718 USE_GTS, SGE_CNTXT_OFLD, id, 2719 q->txq[TXQ_OFLD].phys_addr, 2720 q->txq[TXQ_OFLD].size, 0, 1, 0); 2721 if (ret) { 2722 printf("error %d from t3_sge_init_ecntxt\n", ret); 2723 goto err_unlock; 2724 } 2725 } 2726 2727 if (ntxq > 2) { 2728 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2729 SGE_CNTXT_CTRL, id, 2730 q->txq[TXQ_CTRL].phys_addr, 2731 q->txq[TXQ_CTRL].size, 2732 q->txq[TXQ_CTRL].token, 1, 0); 2733 if (ret) { 2734 printf("error %d from t3_sge_init_ecntxt\n", ret); 2735 goto err_unlock; 2736 } 2737 } 2738 2739 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2740 device_get_unit(sc->dev), irq_vec_idx); 2741 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2742 2743 mtx_unlock_spin(&sc->sge.reg_lock); 2744 t3_update_qset_coalesce(q, p); 2745 q->port = pi; 2746 2747 refill_fl(sc, &q->fl[0], q->fl[0].size); 2748 refill_fl(sc, &q->fl[1], q->fl[1].size); 2749 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2750 2751 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2752 V_NEWTIMER(q->rspq.holdoff_tmr)); 2753 2754 return (0); 2755 2756err_unlock: 2757 mtx_unlock_spin(&sc->sge.reg_lock); 2758err: 2759 TXQ_LOCK(q); 2760 t3_free_qset(sc, q); 2761 2762 return (ret); 2763} 2764 2765/* 2766 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2767 * ethernet data. Hardware assistance with various checksums and any vlan tag 2768 * will also be taken into account here. 2769 */ 2770void 2771t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2772{ 2773 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2774 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2775 struct ifnet *ifp = pi->ifp; 2776 2777 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2778 2779 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2780 cpl->csum_valid && cpl->csum == 0xffff) { 2781 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2782 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2783 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2784 m->m_pkthdr.csum_data = 0xffff; 2785 } 2786 /* 2787 * XXX need to add VLAN support for 6.x 2788 */ 2789#ifdef VLAN_SUPPORTED 2790 if (__predict_false(cpl->vlan_valid)) { 2791 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2792 m->m_flags |= M_VLANTAG; 2793 } 2794#endif 2795 2796 m->m_pkthdr.rcvif = ifp; 2797 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2798 /* 2799 * adjust after conversion to mbuf chain 2800 */ 2801 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2802 m->m_len -= (sizeof(*cpl) + ethpad); 2803 m->m_data += (sizeof(*cpl) + ethpad); 2804} 2805 2806/** 2807 * get_packet - return the next ingress packet buffer from a free list 2808 * @adap: the adapter that received the packet 2809 * @drop_thres: # of remaining buffers before we start dropping packets 2810 * @qs: the qset that the SGE free list holding the packet belongs to 2811 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2812 * @r: response descriptor 2813 * 2814 * Get the next packet from a free list and complete setup of the 2815 * sk_buff. If the packet is small we make a copy and recycle the 2816 * original buffer, otherwise we use the original buffer itself. If a 2817 * positive drop threshold is supplied packets are dropped and their 2818 * buffers recycled if (a) the number of remaining buffers is under the 2819 * threshold and the packet is too big to copy, or (b) the packet should 2820 * be copied but there is no memory for the copy. 2821 */ 2822static int 2823get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2824 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2825{ 2826 2827 unsigned int len_cq = ntohl(r->len_cq); 2828 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2829 int mask, cidx = fl->cidx; 2830 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2831 uint32_t len = G_RSPD_LEN(len_cq); 2832 uint32_t flags = M_EXT; 2833 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2834 caddr_t cl; 2835 struct mbuf *m; 2836 int ret = 0; 2837 2838 mask = fl->size - 1; 2839 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2840 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2841 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2842 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2843 2844 fl->credits--; 2845 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2846 2847 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2848 sopeop == RSPQ_SOP_EOP) { 2849 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2850 goto skip_recycle; 2851 cl = mtod(m, void *); 2852 memcpy(cl, sd->rxsd_cl, len); 2853 recycle_rx_buf(adap, fl, fl->cidx); 2854 m->m_pkthdr.len = m->m_len = len; 2855 m->m_flags = 0; 2856 mh->mh_head = mh->mh_tail = m; 2857 ret = 1; 2858 goto done; 2859 } else { 2860 skip_recycle: 2861 bus_dmamap_unload(fl->entry_tag, sd->map); 2862 cl = sd->rxsd_cl; 2863 m = sd->m; 2864 2865 if ((sopeop == RSPQ_SOP_EOP) || 2866 (sopeop == RSPQ_SOP)) 2867 flags |= M_PKTHDR; 2868 if (fl->zone == zone_pack) { 2869 m_init(m, zone_pack, MCLBYTES, M_NOWAIT, MT_DATA, flags); 2870 /* 2871 * restore clobbered data pointer 2872 */ 2873 m->m_data = m->m_ext.ext_buf; 2874 } else { 2875 m_cljset(m, cl, fl->type); 2876 m->m_flags = flags; 2877 } 2878 m->m_len = len; 2879 } 2880 switch(sopeop) { 2881 case RSPQ_SOP_EOP: 2882 ret = 1; 2883 /* FALLTHROUGH */ 2884 case RSPQ_SOP: 2885 mh->mh_head = mh->mh_tail = m; 2886 m->m_pkthdr.len = len; 2887 break; 2888 case RSPQ_EOP: 2889 ret = 1; 2890 /* FALLTHROUGH */ 2891 case RSPQ_NSOP_NEOP: 2892 if (mh->mh_tail == NULL) { 2893 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2894 m_freem(m); 2895 break; 2896 } 2897 mh->mh_tail->m_next = m; 2898 mh->mh_tail = m; 2899 mh->mh_head->m_pkthdr.len += len; 2900 break; 2901 } 2902 if (cxgb_debug) 2903 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2904done: 2905 if (++fl->cidx == fl->size) 2906 fl->cidx = 0; 2907 2908 return (ret); 2909} 2910 2911/** 2912 * handle_rsp_cntrl_info - handles control information in a response 2913 * @qs: the queue set corresponding to the response 2914 * @flags: the response control flags 2915 * 2916 * Handles the control information of an SGE response, such as GTS 2917 * indications and completion credits for the queue set's Tx queues. 2918 * HW coalesces credits, we don't do any extra SW coalescing. 2919 */ 2920static __inline void 2921handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2922{ 2923 unsigned int credits; 2924 2925#if USE_GTS 2926 if (flags & F_RSPD_TXQ0_GTS) 2927 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2928#endif 2929 credits = G_RSPD_TXQ0_CR(flags); 2930 if (credits) 2931 qs->txq[TXQ_ETH].processed += credits; 2932 2933 credits = G_RSPD_TXQ2_CR(flags); 2934 if (credits) 2935 qs->txq[TXQ_CTRL].processed += credits; 2936 2937# if USE_GTS 2938 if (flags & F_RSPD_TXQ1_GTS) 2939 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2940# endif 2941 credits = G_RSPD_TXQ1_CR(flags); 2942 if (credits) 2943 qs->txq[TXQ_OFLD].processed += credits; 2944 2945} 2946 2947static void 2948check_ring_db(adapter_t *adap, struct sge_qset *qs, 2949 unsigned int sleeping) 2950{ 2951 ; 2952} 2953 2954/** 2955 * process_responses - process responses from an SGE response queue 2956 * @adap: the adapter 2957 * @qs: the queue set to which the response queue belongs 2958 * @budget: how many responses can be processed in this round 2959 * 2960 * Process responses from an SGE response queue up to the supplied budget. 2961 * Responses include received packets as well as credits and other events 2962 * for the queues that belong to the response queue's queue set. 2963 * A negative budget is effectively unlimited. 2964 * 2965 * Additionally choose the interrupt holdoff time for the next interrupt 2966 * on this queue. If the system is under memory shortage use a fairly 2967 * long delay to help recovery. 2968 */ 2969static int 2970process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2971{ 2972 struct sge_rspq *rspq = &qs->rspq; 2973 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2974 int budget_left = budget; 2975 unsigned int sleeping = 0; 2976#ifdef LRO_SUPPORTED 2977 int lro_enabled = qs->lro.enabled; 2978 int skip_lro; 2979 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2980#endif 2981 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2982 int ngathered = 0; 2983#ifdef DEBUG 2984 static int last_holdoff = 0; 2985 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2986 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2987 last_holdoff = rspq->holdoff_tmr; 2988 } 2989#endif 2990 rspq->next_holdoff = rspq->holdoff_tmr; 2991 2992 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2993 int eth, eop = 0, ethpad = 0; 2994 uint32_t flags = ntohl(r->flags); 2995 uint32_t rss_csum = *(const uint32_t *)r; 2996 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2997 2998 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2999 3000 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 3001 struct mbuf *m; 3002 3003 if (cxgb_debug) 3004 printf("async notification\n"); 3005 3006 if (rspq->rspq_mh.mh_head == NULL) { 3007 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3008 m = rspq->rspq_mh.mh_head; 3009 } else { 3010 m = m_gethdr(M_DONTWAIT, MT_DATA); 3011 } 3012 if (m == NULL) 3013 goto no_mem; 3014 3015 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 3016 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 3017 *mtod(m, char *) = CPL_ASYNC_NOTIF; 3018 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 3019 eop = 1; 3020 rspq->async_notif++; 3021 goto skip; 3022 } else if (flags & F_RSPD_IMM_DATA_VALID) { 3023 struct mbuf *m = NULL; 3024 3025 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 3026 r->rss_hdr.opcode, rspq->cidx); 3027 if (rspq->rspq_mh.mh_head == NULL) 3028 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3029 else 3030 m = m_gethdr(M_DONTWAIT, MT_DATA); 3031 3032 if (rspq->rspq_mh.mh_head == NULL && m == NULL) { 3033 no_mem: 3034 rspq->next_holdoff = NOMEM_INTR_DELAY; 3035 budget_left--; 3036 break; 3037 } 3038 get_imm_packet(adap, r, rspq->rspq_mh.mh_head); 3039 eop = 1; 3040 rspq->imm_data++; 3041 } else if (r->len_cq) { 3042 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 3043 3044 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 3045 if (eop) { 3046 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID; 3047 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash; 3048 } 3049 3050 ethpad = 2; 3051 } else { 3052 rspq->pure_rsps++; 3053 } 3054 skip: 3055 if (flags & RSPD_CTRL_MASK) { 3056 sleeping |= flags & RSPD_GTS_MASK; 3057 handle_rsp_cntrl_info(qs, flags); 3058 } 3059 3060 r++; 3061 if (__predict_false(++rspq->cidx == rspq->size)) { 3062 rspq->cidx = 0; 3063 rspq->gen ^= 1; 3064 r = rspq->desc; 3065 } 3066 3067 if (++rspq->credits >= (rspq->size / 4)) { 3068 refill_rspq(adap, rspq, rspq->credits); 3069 rspq->credits = 0; 3070 } 3071 if (!eth && eop) { 3072 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 3073 /* 3074 * XXX size mismatch 3075 */ 3076 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 3077 3078 3079 ngathered = rx_offload(&adap->tdev, rspq, 3080 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 3081 rspq->rspq_mh.mh_head = NULL; 3082 DPRINTF("received offload packet\n"); 3083 3084 } else if (eth && eop) { 3085 struct mbuf *m = rspq->rspq_mh.mh_head; 3086 3087 t3_rx_eth(adap, rspq, m, ethpad); 3088 3089#ifdef LRO_SUPPORTED 3090 /* 3091 * The T304 sends incoming packets on any qset. If LRO 3092 * is also enabled, we could end up sending packet up 3093 * lro_ctrl->ifp's input. That is incorrect. 3094 * 3095 * The mbuf's rcvif was derived from the cpl header and 3096 * is accurate. Skip LRO and just use that. 3097 */ 3098 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 3099 3100 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && 3101 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) { 3102 /* successfully queue'd for LRO */ 3103 } else 3104#endif 3105 { 3106 /* 3107 * LRO not enabled, packet unsuitable for LRO, 3108 * or unable to queue. Pass it up right now in 3109 * either case. 3110 */ 3111 struct ifnet *ifp = m->m_pkthdr.rcvif; 3112 (*ifp->if_input)(ifp, m); 3113 } 3114 rspq->rspq_mh.mh_head = NULL; 3115 3116 } 3117 __refill_fl_lt(adap, &qs->fl[0], 32); 3118 __refill_fl_lt(adap, &qs->fl[1], 32); 3119 --budget_left; 3120 } 3121 3122 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 3123 3124#ifdef LRO_SUPPORTED 3125 /* Flush LRO */ 3126 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 3127 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 3128 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 3129 tcp_lro_flush(lro_ctrl, queued); 3130 } 3131#endif 3132 3133 if (sleeping) 3134 check_ring_db(adap, qs, sleeping); 3135 3136 mb(); /* commit Tx queue processed updates */ 3137 if (__predict_false(qs->txq_stopped > 1)) { 3138 printf("restarting tx on %p\n", qs); 3139 3140 restart_tx(qs); 3141 } 3142 3143 __refill_fl_lt(adap, &qs->fl[0], 512); 3144 __refill_fl_lt(adap, &qs->fl[1], 512); 3145 budget -= budget_left; 3146 return (budget); 3147} 3148 3149/* 3150 * A helper function that processes responses and issues GTS. 3151 */ 3152static __inline int 3153process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3154{ 3155 int work; 3156 static int last_holdoff = 0; 3157 3158 work = process_responses(adap, rspq_to_qset(rq), -1); 3159 3160 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3161 printf("next_holdoff=%d\n", rq->next_holdoff); 3162 last_holdoff = rq->next_holdoff; 3163 } 3164 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3165 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3166 3167 return (work); 3168} 3169 3170 3171/* 3172 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3173 * Handles data events from SGE response queues as well as error and other 3174 * async events as they all use the same interrupt pin. We use one SGE 3175 * response queue per port in this mode and protect all response queues with 3176 * queue 0's lock. 3177 */ 3178void 3179t3b_intr(void *data) 3180{ 3181 uint32_t i, map; 3182 adapter_t *adap = data; 3183 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3184 3185 t3_write_reg(adap, A_PL_CLI, 0); 3186 map = t3_read_reg(adap, A_SG_DATA_INTR); 3187 3188 if (!map) 3189 return; 3190 3191 if (__predict_false(map & F_ERRINTR)) 3192 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3193 3194 mtx_lock(&q0->lock); 3195 for_each_port(adap, i) 3196 if (map & (1 << i)) 3197 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3198 mtx_unlock(&q0->lock); 3199} 3200 3201/* 3202 * The MSI interrupt handler. This needs to handle data events from SGE 3203 * response queues as well as error and other async events as they all use 3204 * the same MSI vector. We use one SGE response queue per port in this mode 3205 * and protect all response queues with queue 0's lock. 3206 */ 3207void 3208t3_intr_msi(void *data) 3209{ 3210 adapter_t *adap = data; 3211 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3212 int i, new_packets = 0; 3213 3214 mtx_lock(&q0->lock); 3215 3216 for_each_port(adap, i) 3217 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3218 new_packets = 1; 3219 mtx_unlock(&q0->lock); 3220 if (new_packets == 0) 3221 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3222} 3223 3224void 3225t3_intr_msix(void *data) 3226{ 3227 struct sge_qset *qs = data; 3228 adapter_t *adap = qs->port->adapter; 3229 struct sge_rspq *rspq = &qs->rspq; 3230 3231 if (process_responses_gts(adap, rspq) == 0) 3232 rspq->unhandled_irqs++; 3233} 3234 3235#define QDUMP_SBUF_SIZE 32 * 400 3236static int 3237t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3238{ 3239 struct sge_rspq *rspq; 3240 struct sge_qset *qs; 3241 int i, err, dump_end, idx; 3242 static int multiplier = 1; 3243 struct sbuf *sb; 3244 struct rsp_desc *rspd; 3245 uint32_t data[4]; 3246 3247 rspq = arg1; 3248 qs = rspq_to_qset(rspq); 3249 if (rspq->rspq_dump_count == 0) 3250 return (0); 3251 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3252 log(LOG_WARNING, 3253 "dump count is too large %d\n", rspq->rspq_dump_count); 3254 rspq->rspq_dump_count = 0; 3255 return (EINVAL); 3256 } 3257 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3258 log(LOG_WARNING, 3259 "dump start of %d is greater than queue size\n", 3260 rspq->rspq_dump_start); 3261 rspq->rspq_dump_start = 0; 3262 return (EINVAL); 3263 } 3264 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3265 if (err) 3266 return (err); 3267retry_sbufops: 3268 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3269 3270 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3271 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3272 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3273 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3274 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3275 3276 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3277 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3278 3279 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3280 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3281 idx = i & (RSPQ_Q_SIZE-1); 3282 3283 rspd = &rspq->desc[idx]; 3284 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3285 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3286 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3287 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3288 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3289 be32toh(rspd->len_cq), rspd->intr_gen); 3290 } 3291 if (sbuf_overflowed(sb)) { 3292 sbuf_delete(sb); 3293 multiplier++; 3294 goto retry_sbufops; 3295 } 3296 sbuf_finish(sb); 3297 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3298 sbuf_delete(sb); 3299 return (err); 3300} 3301 3302static int 3303t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3304{ 3305 struct sge_txq *txq; 3306 struct sge_qset *qs; 3307 int i, j, err, dump_end; 3308 static int multiplier = 1; 3309 struct sbuf *sb; 3310 struct tx_desc *txd; 3311 uint32_t *WR, wr_hi, wr_lo, gen; 3312 uint32_t data[4]; 3313 3314 txq = arg1; 3315 qs = txq_to_qset(txq, TXQ_ETH); 3316 if (txq->txq_dump_count == 0) { 3317 return (0); 3318 } 3319 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3320 log(LOG_WARNING, 3321 "dump count is too large %d\n", txq->txq_dump_count); 3322 txq->txq_dump_count = 1; 3323 return (EINVAL); 3324 } 3325 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3326 log(LOG_WARNING, 3327 "dump start of %d is greater than queue size\n", 3328 txq->txq_dump_start); 3329 txq->txq_dump_start = 0; 3330 return (EINVAL); 3331 } 3332 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3333 if (err) 3334 return (err); 3335 3336 3337retry_sbufops: 3338 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3339 3340 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3341 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3342 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3343 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3344 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3345 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3346 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3347 txq->txq_dump_start, 3348 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3349 3350 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3351 for (i = txq->txq_dump_start; i < dump_end; i++) { 3352 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3353 WR = (uint32_t *)txd->flit; 3354 wr_hi = ntohl(WR[0]); 3355 wr_lo = ntohl(WR[1]); 3356 gen = G_WR_GEN(wr_lo); 3357 3358 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3359 wr_hi, wr_lo, gen); 3360 for (j = 2; j < 30; j += 4) 3361 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3362 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3363 3364 } 3365 if (sbuf_overflowed(sb)) { 3366 sbuf_delete(sb); 3367 multiplier++; 3368 goto retry_sbufops; 3369 } 3370 sbuf_finish(sb); 3371 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3372 sbuf_delete(sb); 3373 return (err); 3374} 3375 3376static int 3377t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3378{ 3379 struct sge_txq *txq; 3380 struct sge_qset *qs; 3381 int i, j, err, dump_end; 3382 static int multiplier = 1; 3383 struct sbuf *sb; 3384 struct tx_desc *txd; 3385 uint32_t *WR, wr_hi, wr_lo, gen; 3386 3387 txq = arg1; 3388 qs = txq_to_qset(txq, TXQ_CTRL); 3389 if (txq->txq_dump_count == 0) { 3390 return (0); 3391 } 3392 if (txq->txq_dump_count > 256) { 3393 log(LOG_WARNING, 3394 "dump count is too large %d\n", txq->txq_dump_count); 3395 txq->txq_dump_count = 1; 3396 return (EINVAL); 3397 } 3398 if (txq->txq_dump_start > 255) { 3399 log(LOG_WARNING, 3400 "dump start of %d is greater than queue size\n", 3401 txq->txq_dump_start); 3402 txq->txq_dump_start = 0; 3403 return (EINVAL); 3404 } 3405 3406retry_sbufops: 3407 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3408 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3409 txq->txq_dump_start, 3410 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3411 3412 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3413 for (i = txq->txq_dump_start; i < dump_end; i++) { 3414 txd = &txq->desc[i & (255)]; 3415 WR = (uint32_t *)txd->flit; 3416 wr_hi = ntohl(WR[0]); 3417 wr_lo = ntohl(WR[1]); 3418 gen = G_WR_GEN(wr_lo); 3419 3420 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3421 wr_hi, wr_lo, gen); 3422 for (j = 2; j < 30; j += 4) 3423 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3424 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3425 3426 } 3427 if (sbuf_overflowed(sb)) { 3428 sbuf_delete(sb); 3429 multiplier++; 3430 goto retry_sbufops; 3431 } 3432 sbuf_finish(sb); 3433 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3434 sbuf_delete(sb); 3435 return (err); 3436} 3437 3438static int 3439t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3440{ 3441 adapter_t *sc = arg1; 3442 struct qset_params *qsp = &sc->params.sge.qset[0]; 3443 int coalesce_usecs; 3444 struct sge_qset *qs; 3445 int i, j, err, nqsets = 0; 3446 struct mtx *lock; 3447 3448 if ((sc->flags & FULL_INIT_DONE) == 0) 3449 return (ENXIO); 3450 3451 coalesce_usecs = qsp->coalesce_usecs; 3452 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3453 3454 if (err != 0) { 3455 return (err); 3456 } 3457 if (coalesce_usecs == qsp->coalesce_usecs) 3458 return (0); 3459 3460 for (i = 0; i < sc->params.nports; i++) 3461 for (j = 0; j < sc->port[i].nqsets; j++) 3462 nqsets++; 3463 3464 coalesce_usecs = max(1, coalesce_usecs); 3465 3466 for (i = 0; i < nqsets; i++) { 3467 qs = &sc->sge.qs[i]; 3468 qsp = &sc->params.sge.qset[i]; 3469 qsp->coalesce_usecs = coalesce_usecs; 3470 3471 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3472 &sc->sge.qs[0].rspq.lock; 3473 3474 mtx_lock(lock); 3475 t3_update_qset_coalesce(qs, qsp); 3476 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3477 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3478 mtx_unlock(lock); 3479 } 3480 3481 return (0); 3482} 3483 3484 3485void 3486t3_add_attach_sysctls(adapter_t *sc) 3487{ 3488 struct sysctl_ctx_list *ctx; 3489 struct sysctl_oid_list *children; 3490 3491 ctx = device_get_sysctl_ctx(sc->dev); 3492 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3493 3494 /* random information */ 3495 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3496 "firmware_version", 3497 CTLFLAG_RD, &sc->fw_version, 3498 0, "firmware version"); 3499 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3500 "hw_revision", 3501 CTLFLAG_RD, &sc->params.rev, 3502 0, "chip model"); 3503 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3504 "port_types", 3505 CTLFLAG_RD, &sc->port_types, 3506 0, "type of ports"); 3507 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3508 "enable_debug", 3509 CTLFLAG_RW, &cxgb_debug, 3510 0, "enable verbose debugging output"); 3511 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3512 CTLFLAG_RD, &sc->tunq_coalesce, 3513 "#tunneled packets freed"); 3514 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3515 "txq_overrun", 3516 CTLFLAG_RD, &txq_fills, 3517 0, "#times txq overrun"); 3518} 3519 3520 3521static const char *rspq_name = "rspq"; 3522static const char *txq_names[] = 3523{ 3524 "txq_eth", 3525 "txq_ofld", 3526 "txq_ctrl" 3527}; 3528 3529static int 3530sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3531{ 3532 struct port_info *p = arg1; 3533 uint64_t *parg; 3534 3535 if (!p) 3536 return (EINVAL); 3537 3538 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3539 PORT_LOCK(p); 3540 t3_mac_update_stats(&p->mac); 3541 PORT_UNLOCK(p); 3542 3543 return (sysctl_handle_quad(oidp, parg, 0, req)); 3544} 3545 3546void 3547t3_add_configured_sysctls(adapter_t *sc) 3548{ 3549 struct sysctl_ctx_list *ctx; 3550 struct sysctl_oid_list *children; 3551 int i, j; 3552 3553 ctx = device_get_sysctl_ctx(sc->dev); 3554 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3555 3556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3557 "intr_coal", 3558 CTLTYPE_INT|CTLFLAG_RW, sc, 3559 0, t3_set_coalesce_usecs, 3560 "I", "interrupt coalescing timer (us)"); 3561 3562 for (i = 0; i < sc->params.nports; i++) { 3563 struct port_info *pi = &sc->port[i]; 3564 struct sysctl_oid *poid; 3565 struct sysctl_oid_list *poidlist; 3566 struct mac_stats *mstats = &pi->mac.stats; 3567 3568 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3569 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3570 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3571 poidlist = SYSCTL_CHILDREN(poid); 3572 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3573 "nqsets", CTLFLAG_RD, &pi->nqsets, 3574 0, "#queue sets"); 3575 3576 for (j = 0; j < pi->nqsets; j++) { 3577 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3578 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3579 *ctrlqpoid, *lropoid; 3580 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3581 *txqpoidlist, *ctrlqpoidlist, 3582 *lropoidlist; 3583 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3584 3585 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3586 3587 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3588 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3589 qspoidlist = SYSCTL_CHILDREN(qspoid); 3590 3591 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3592 CTLFLAG_RD, &qs->fl[0].empty, 0, 3593 "freelist #0 empty"); 3594 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3595 CTLFLAG_RD, &qs->fl[1].empty, 0, 3596 "freelist #1 empty"); 3597 3598 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3599 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3600 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3601 3602 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3603 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3604 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3605 3606 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3607 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3608 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3609 3610 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3611 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3612 lropoidlist = SYSCTL_CHILDREN(lropoid); 3613 3614 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3615 CTLFLAG_RD, &qs->rspq.size, 3616 0, "#entries in response queue"); 3617 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3618 CTLFLAG_RD, &qs->rspq.cidx, 3619 0, "consumer index"); 3620 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3621 CTLFLAG_RD, &qs->rspq.credits, 3622 0, "#credits"); 3623 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3624 CTLFLAG_RD, &qs->rspq.phys_addr, 3625 "physical_address_of the queue"); 3626 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3627 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3628 0, "start rspq dump entry"); 3629 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3630 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3631 0, "#rspq entries to dump"); 3632 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3633 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3634 0, t3_dump_rspq, "A", "dump of the response queue"); 3635 3636 3637 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3638 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3639 0, "#tunneled packets dropped"); 3640 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3641 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3642 0, "#tunneled packets waiting to be sent"); 3643#if 0 3644 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3645 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3646 0, "#tunneled packets queue producer index"); 3647 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3648 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3649 0, "#tunneled packets queue consumer index"); 3650#endif 3651 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3652 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3653 0, "#tunneled packets processed by the card"); 3654 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3655 CTLFLAG_RD, &txq->cleaned, 3656 0, "#tunneled packets cleaned"); 3657 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3658 CTLFLAG_RD, &txq->in_use, 3659 0, "#tunneled packet slots in use"); 3660 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3661 CTLFLAG_RD, &txq->txq_frees, 3662 "#tunneled packets freed"); 3663 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3664 CTLFLAG_RD, &txq->txq_skipped, 3665 0, "#tunneled packet descriptors skipped"); 3666 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3667 CTLFLAG_RD, &txq->txq_coalesced, 3668 "#tunneled packets coalesced"); 3669 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3670 CTLFLAG_RD, &txq->txq_enqueued, 3671 0, "#tunneled packets enqueued to hardware"); 3672 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3673 CTLFLAG_RD, &qs->txq_stopped, 3674 0, "tx queues stopped"); 3675 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3676 CTLFLAG_RD, &txq->phys_addr, 3677 "physical_address_of the queue"); 3678 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3679 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3680 0, "txq generation"); 3681 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3682 CTLFLAG_RD, &txq->cidx, 3683 0, "hardware queue cidx"); 3684 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3685 CTLFLAG_RD, &txq->pidx, 3686 0, "hardware queue pidx"); 3687 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3688 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3689 0, "txq start idx for dump"); 3690 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3691 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3692 0, "txq #entries to dump"); 3693 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3694 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3695 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3696 3697 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3698 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3699 0, "ctrlq start idx for dump"); 3700 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3701 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3702 0, "ctrl #entries to dump"); 3703 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3704 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3705 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3706 3707#ifdef LRO_SUPPORTED 3708 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3709 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3710 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3711 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3712 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3713 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3714 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3715 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3716#endif 3717 } 3718 3719 /* Now add a node for mac stats. */ 3720 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3721 CTLFLAG_RD, NULL, "MAC statistics"); 3722 poidlist = SYSCTL_CHILDREN(poid); 3723 3724 /* 3725 * We (ab)use the length argument (arg2) to pass on the offset 3726 * of the data that we are interested in. This is only required 3727 * for the quad counters that are updated from the hardware (we 3728 * make sure that we return the latest value). 3729 * sysctl_handle_macstat first updates *all* the counters from 3730 * the hardware, and then returns the latest value of the 3731 * requested counter. Best would be to update only the 3732 * requested counter from hardware, but t3_mac_update_stats() 3733 * hides all the register details and we don't want to dive into 3734 * all that here. 3735 */ 3736#define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3737 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3738 sysctl_handle_macstat, "QU", 0) 3739 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3740 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3741 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3742 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3743 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3744 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3745 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3746 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3747 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3748 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3749 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3750 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3751 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3752 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3753 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3754 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3755 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3756 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3757 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3758 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3759 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3760 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3761 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3762 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3763 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3764 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3765 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3766 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3767 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3768 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3769 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3770 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3771 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3772 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3773 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3774 CXGB_SYSCTL_ADD_QUAD(rx_short); 3775 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3776 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3777 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3778 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3779 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3780 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3781 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3782 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3783 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3784 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3785#undef CXGB_SYSCTL_ADD_QUAD 3786 3787#define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3788 CTLFLAG_RD, &mstats->a, 0) 3789 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3790 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3791 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3792 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3793 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3794 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3795 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3796 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3797 CXGB_SYSCTL_ADD_ULONG(num_resets); 3798 CXGB_SYSCTL_ADD_ULONG(link_faults); 3799#undef CXGB_SYSCTL_ADD_ULONG 3800 } 3801} 3802 3803/** 3804 * t3_get_desc - dump an SGE descriptor for debugging purposes 3805 * @qs: the queue set 3806 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3807 * @idx: the descriptor index in the queue 3808 * @data: where to dump the descriptor contents 3809 * 3810 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3811 * size of the descriptor. 3812 */ 3813int 3814t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3815 unsigned char *data) 3816{ 3817 if (qnum >= 6) 3818 return (EINVAL); 3819 3820 if (qnum < 3) { 3821 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3822 return -EINVAL; 3823 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3824 return sizeof(struct tx_desc); 3825 } 3826 3827 if (qnum == 3) { 3828 if (!qs->rspq.desc || idx >= qs->rspq.size) 3829 return (EINVAL); 3830 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3831 return sizeof(struct rsp_desc); 3832 } 3833 3834 qnum -= 4; 3835 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3836 return (EINVAL); 3837 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3838 return sizeof(struct rx_desc); 3839} 3840