1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h>
| 1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h>
|
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 170789 2007-06-15 20:02:02Z kmacy $");
| 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_sge.c 170869 2007-06-17 04:33:38Z kmacy $");
|
32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/kernel.h> 36#include <sys/module.h> 37#include <sys/bus.h> 38#include <sys/conf.h> 39#include <machine/bus.h> 40#include <machine/resource.h> 41#include <sys/bus_dma.h> 42#include <sys/rman.h> 43#include <sys/queue.h> 44#include <sys/sysctl.h> 45#include <sys/taskqueue.h> 46 47 48#include <sys/proc.h> 49#include <sys/sched.h> 50#include <sys/smp.h> 51#include <sys/systm.h> 52 53#include <netinet/in_systm.h> 54#include <netinet/in.h> 55#include <netinet/ip.h> 56#include <netinet/tcp.h> 57 58#include <dev/pci/pcireg.h> 59#include <dev/pci/pcivar.h> 60 61#ifdef CONFIG_DEFINED 62#include <cxgb_include.h> 63#else 64#include <dev/cxgb/cxgb_include.h> 65#endif 66 67uint32_t collapse_free = 0; 68uint32_t mb_free_vec_free = 0; 69int collapse_mbufs = 0; 70static int recycle_enable = 1; 71 72 73/* 74 * XXX GC 75 */ 76#define NET_XMIT_CN 2 77#define NET_XMIT_SUCCESS 0 78 79#define USE_GTS 0 80 81#define SGE_RX_SM_BUF_SIZE 1536 82#define SGE_RX_DROP_THRES 16 83#define SGE_RX_COPY_THRES 128 84 85/* 86 * Period of the Tx buffer reclaim timer. This timer does not need to run 87 * frequently as Tx buffers are usually reclaimed by new Tx packets. 88 */ 89#define TX_RECLAIM_PERIOD (hz >> 1) 90 91/* 92 * work request size in bytes 93 */ 94#define WR_LEN (WR_FLITS * 8) 95 96/* 97 * Values for sge_txq.flags 98 */ 99enum { 100 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 101 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 102}; 103 104struct tx_desc { 105 uint64_t flit[TX_DESC_FLITS]; 106} __packed; 107 108struct rx_desc { 109 uint32_t addr_lo; 110 uint32_t len_gen; 111 uint32_t gen2; 112 uint32_t addr_hi; 113} __packed;; 114 115struct rsp_desc { /* response queue descriptor */ 116 struct rss_header rss_hdr; 117 uint32_t flags; 118 uint32_t len_cq; 119 uint8_t imm_data[47]; 120 uint8_t intr_gen; 121} __packed; 122 123#define RX_SW_DESC_MAP_CREATED (1 << 0) 124#define TX_SW_DESC_MAP_CREATED (1 << 1) 125#define RX_SW_DESC_INUSE (1 << 3) 126#define TX_SW_DESC_MAPPED (1 << 4) 127 128#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 129#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 130#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 131#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 132 133struct tx_sw_desc { /* SW state per Tx descriptor */ 134 struct mbuf *m; 135 bus_dmamap_t map; 136 int flags; 137}; 138 139struct rx_sw_desc { /* SW state per Rx descriptor */ 140 void *cl; 141 bus_dmamap_t map; 142 int flags; 143}; 144 145struct txq_state { 146 unsigned int compl; 147 unsigned int gen; 148 unsigned int pidx; 149}; 150 151struct refill_fl_cb_arg { 152 int error; 153 bus_dma_segment_t seg; 154 int nseg; 155}; 156 157/* 158 * Maps a number of flits to the number of Tx descriptors that can hold them. 159 * The formula is 160 * 161 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 162 * 163 * HW allows up to 4 descriptors to be combined into a WR. 164 */ 165static uint8_t flit_desc_map[] = { 166 0, 167#if SGE_NUM_GENBITS == 1 168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 170 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 171 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 172#elif SGE_NUM_GENBITS == 2 173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 174 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 175 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 176 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 177#else 178# error "SGE_NUM_GENBITS must be 1 or 2" 179#endif 180}; 181 182 183static int lro_default = 0; 184int cxgb_debug = 0; 185 186static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 187static void sge_timer_cb(void *arg); 188static void sge_timer_reclaim(void *arg, int ncount); 189static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec); 190 191/** 192 * reclaim_completed_tx - reclaims completed Tx descriptors 193 * @adapter: the adapter 194 * @q: the Tx queue to reclaim completed descriptors from 195 * 196 * Reclaims Tx descriptors that the SGE has indicated it has processed, 197 * and frees the associated buffers if possible. Called with the Tx 198 * queue's lock held. 199 */ 200static __inline int 201reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec) 202{ 203 int reclaimed, reclaim = desc_reclaimable(q); 204 int n = 0; 205 206 mtx_assert(&q->lock, MA_OWNED); 207 if (reclaim > 0) { 208 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec); 209 reclaimed = min(reclaim, nbufs); 210 q->cleaned += reclaimed; 211 q->in_use -= reclaimed; 212 } 213 return (n); 214} 215 216/** 217 * should_restart_tx - are there enough resources to restart a Tx queue? 218 * @q: the Tx queue 219 * 220 * Checks if there are enough descriptors to restart a suspended Tx queue. 221 */ 222static __inline int 223should_restart_tx(const struct sge_txq *q) 224{ 225 unsigned int r = q->processed - q->cleaned; 226 227 return q->in_use - r < (q->size >> 1); 228} 229 230/** 231 * t3_sge_init - initialize SGE 232 * @adap: the adapter 233 * @p: the SGE parameters 234 * 235 * Performs SGE initialization needed every time after a chip reset. 236 * We do not initialize any of the queue sets here, instead the driver 237 * top-level must request those individually. We also do not enable DMA 238 * here, that should be done after the queues have been set up. 239 */ 240void 241t3_sge_init(adapter_t *adap, struct sge_params *p) 242{ 243 u_int ctrl, ups; 244 245 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 246 247 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 248 F_CQCRDTCTRL | 249 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 250 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 251#if SGE_NUM_GENBITS == 1 252 ctrl |= F_EGRGENCTRL; 253#endif 254 if (adap->params.rev > 0) { 255 if (!(adap->flags & (USING_MSIX | USING_MSI))) 256 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 257 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 258 } 259 t3_write_reg(adap, A_SG_CONTROL, ctrl); 260 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 261 V_LORCQDRBTHRSH(512)); 262 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 263 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 264 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 265 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 266 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 267 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 268 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 269 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 270 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 271} 272 273 274/** 275 * sgl_len - calculates the size of an SGL of the given capacity 276 * @n: the number of SGL entries 277 * 278 * Calculates the number of flits needed for a scatter/gather list that 279 * can hold the given number of entries. 280 */ 281static __inline unsigned int 282sgl_len(unsigned int n) 283{ 284 return ((3 * n) / 2 + (n & 1)); 285} 286 287/** 288 * get_imm_packet - return the next ingress packet buffer from a response 289 * @resp: the response descriptor containing the packet data 290 * 291 * Return a packet containing the immediate data of the given response. 292 */ 293static __inline void 294get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl) 295{ 296 int len; 297 uint32_t flags = ntohl(resp->flags); 298 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 299 300 /* 301 * would be a firmware bug 302 */ 303 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 304 return; 305 306 len = G_RSPD_LEN(ntohl(resp->len_cq)); 307 switch (sopeop) { 308 case RSPQ_SOP_EOP: 309 m->m_len = m->m_pkthdr.len = len; 310 memcpy(mtod(m, uint8_t *), resp->imm_data, len); 311 break; 312 case RSPQ_EOP: 313 memcpy(cl, resp->imm_data, len); 314 m_iovappend(m, cl, MSIZE, len, 0); 315 break; 316 } 317} 318 319 320static __inline u_int 321flits_to_desc(u_int n) 322{ 323 return (flit_desc_map[n]); 324} 325 326void 327t3_sge_err_intr_handler(adapter_t *adapter) 328{ 329 unsigned int v, status; 330 331 332 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 333 334 if (status & F_RSPQCREDITOVERFOW) 335 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 336 337 if (status & F_RSPQDISABLED) { 338 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 339 340 CH_ALERT(adapter, 341 "packet delivered to disabled response queue (0x%x)\n", 342 (v >> S_RSPQ0DISABLED) & 0xff); 343 } 344 345 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 346 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 347 t3_fatal_err(adapter); 348} 349 350void 351t3_sge_prep(adapter_t *adap, struct sge_params *p) 352{ 353 int i; 354 355 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 356 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 357 358 for (i = 0; i < SGE_QSETS; ++i) { 359 struct qset_params *q = p->qset + i; 360 361 q->polling = adap->params.rev > 0; 362 363 q->coalesce_nsecs = 5000; 364 365 q->rspq_size = RSPQ_Q_SIZE; 366 q->fl_size = FL_Q_SIZE; 367 q->jumbo_size = JUMBO_Q_SIZE; 368 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 369 q->txq_size[TXQ_OFLD] = 1024; 370 q->txq_size[TXQ_CTRL] = 256; 371 q->cong_thres = 0; 372 } 373} 374 375int 376t3_sge_alloc(adapter_t *sc) 377{ 378 379 /* The parent tag. */ 380 if (bus_dma_tag_create( NULL, /* parent */ 381 1, 0, /* algnmnt, boundary */ 382 BUS_SPACE_MAXADDR, /* lowaddr */ 383 BUS_SPACE_MAXADDR, /* highaddr */ 384 NULL, NULL, /* filter, filterarg */ 385 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 386 BUS_SPACE_UNRESTRICTED, /* nsegments */ 387 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 388 0, /* flags */ 389 NULL, NULL, /* lock, lockarg */ 390 &sc->parent_dmat)) { 391 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 392 return (ENOMEM); 393 } 394 395 /* 396 * DMA tag for normal sized RX frames 397 */ 398 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 399 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 400 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 401 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 402 return (ENOMEM); 403 } 404 405 /* 406 * DMA tag for jumbo sized RX frames. 407 */ 408 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 409 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 410 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 411 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 412 return (ENOMEM); 413 } 414 415 /* 416 * DMA tag for TX frames. 417 */ 418 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 419 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 420 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 421 NULL, NULL, &sc->tx_dmat)) { 422 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 423 return (ENOMEM); 424 } 425 426 return (0); 427} 428 429int 430t3_sge_free(struct adapter * sc) 431{ 432 433 if (sc->tx_dmat != NULL) 434 bus_dma_tag_destroy(sc->tx_dmat); 435 436 if (sc->rx_jumbo_dmat != NULL) 437 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 438 439 if (sc->rx_dmat != NULL) 440 bus_dma_tag_destroy(sc->rx_dmat); 441 442 if (sc->parent_dmat != NULL) 443 bus_dma_tag_destroy(sc->parent_dmat); 444 445 return (0); 446} 447 448void 449t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 450{ 451 452 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 453 qs->rspq.polling = 0 /* p->polling */; 454} 455 456static void 457refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 458{ 459 struct refill_fl_cb_arg *cb_arg = arg; 460 461 cb_arg->error = error; 462 cb_arg->seg = segs[0]; 463 cb_arg->nseg = nseg; 464 465} 466 467/** 468 * refill_fl - refill an SGE free-buffer list 469 * @sc: the controller softc 470 * @q: the free-list to refill 471 * @n: the number of new buffers to allocate 472 * 473 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 474 * The caller must assure that @n does not exceed the queue's capacity. 475 */ 476static void 477refill_fl(adapter_t *sc, struct sge_fl *q, int n) 478{ 479 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 480 struct rx_desc *d = &q->desc[q->pidx]; 481 struct refill_fl_cb_arg cb_arg; 482 void *cl; 483 int err; 484 485 cb_arg.error = 0; 486 while (n--) { 487 /* 488 * We only allocate a cluster, mbuf allocation happens after rx 489 */ 490 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { 491 log(LOG_WARNING, "Failed to allocate cluster\n"); 492 goto done; 493 } 494 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 495 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 496 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 497 uma_zfree(q->zone, cl); 498 goto done; 499 } 500 sd->flags |= RX_SW_DESC_MAP_CREATED; 501 } 502 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, 503 refill_fl_cb, &cb_arg, 0); 504 505 if (err != 0 || cb_arg.error) { 506 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 507 /* 508 * XXX free cluster 509 */ 510 return; 511 } 512 513 sd->flags |= RX_SW_DESC_INUSE; 514 sd->cl = cl; 515 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 516 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 517 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 518 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 519 520 d++; 521 sd++; 522 523 if (++q->pidx == q->size) { 524 q->pidx = 0; 525 q->gen ^= 1; 526 sd = q->sdesc; 527 d = q->desc; 528 } 529 q->credits++; 530 } 531 532done: 533 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 534} 535 536 537/** 538 * free_rx_bufs - free the Rx buffers on an SGE free list 539 * @sc: the controle softc 540 * @q: the SGE free list to clean up 541 * 542 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 543 * this queue should be stopped before calling this function. 544 */ 545static void 546free_rx_bufs(adapter_t *sc, struct sge_fl *q) 547{ 548 u_int cidx = q->cidx; 549 550 while (q->credits--) { 551 struct rx_sw_desc *d = &q->sdesc[cidx]; 552 553 if (d->flags & RX_SW_DESC_INUSE) { 554 bus_dmamap_unload(q->entry_tag, d->map); 555 bus_dmamap_destroy(q->entry_tag, d->map); 556 uma_zfree(q->zone, d->cl); 557 } 558 d->cl = NULL; 559 if (++cidx == q->size) 560 cidx = 0; 561 } 562} 563 564static __inline void 565__refill_fl(adapter_t *adap, struct sge_fl *fl) 566{ 567 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 568} 569 570/** 571 * recycle_rx_buf - recycle a receive buffer 572 * @adapter: the adapter 573 * @q: the SGE free list 574 * @idx: index of buffer to recycle 575 * 576 * Recycles the specified buffer on the given free list by adding it at 577 * the next available slot on the list. 578 */ 579static void 580recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 581{ 582 struct rx_desc *from = &q->desc[idx]; 583 struct rx_desc *to = &q->desc[q->pidx]; 584 585 q->sdesc[q->pidx] = q->sdesc[idx]; 586 to->addr_lo = from->addr_lo; // already big endian 587 to->addr_hi = from->addr_hi; // likewise 588 wmb(); 589 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 590 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 591 q->credits++; 592 593 if (++q->pidx == q->size) { 594 q->pidx = 0; 595 q->gen ^= 1; 596 } 597 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 598} 599 600static void 601alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 602{ 603 uint32_t *addr; 604 605 addr = arg; 606 *addr = segs[0].ds_addr; 607} 608 609static int 610alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 611 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 612 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 613{ 614 size_t len = nelem * elem_size; 615 void *s = NULL; 616 void *p = NULL; 617 int err; 618 619 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 620 BUS_SPACE_MAXADDR_32BIT, 621 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 622 len, 0, NULL, NULL, tag)) != 0) { 623 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 624 return (ENOMEM); 625 } 626 627 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 628 map)) != 0) { 629 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 630 return (ENOMEM); 631 } 632 633 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 634 bzero(p, len); 635 *(void **)desc = p; 636 637 if (sw_size) { 638 len = nelem * sw_size; 639 s = malloc(len, M_DEVBUF, M_WAITOK); 640 bzero(s, len); 641 *(void **)sdesc = s; 642 } 643 if (parent_entry_tag == NULL) 644 return (0); 645 646 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 647 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 648 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 649 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 650 NULL, NULL, entry_tag)) != 0) { 651 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 652 return (ENOMEM); 653 } 654 return (0); 655} 656 657static void 658sge_slow_intr_handler(void *arg, int ncount) 659{ 660 adapter_t *sc = arg; 661 662 t3_slow_intr_handler(sc); 663} 664 665static void 666sge_timer_cb(void *arg) 667{ 668 adapter_t *sc = arg; 669 struct port_info *p; 670 struct sge_qset *qs; 671 struct sge_txq *txq; 672 int i, j; 673 int reclaim_eth, reclaim_ofl, refill_rx; 674 675 for (i = 0; i < sc->params.nports; i++) 676 for (j = 0; j < sc->port[i].nqsets; j++) { 677 qs = &sc->sge.qs[i + j]; 678 txq = &qs->txq[0]; 679 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 680 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 681 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 682 (qs->fl[1].credits < qs->fl[1].size)); 683 if (reclaim_eth || reclaim_ofl || refill_rx) { 684 p = &sc->port[i]; 685 taskqueue_enqueue(p->tq, &p->timer_reclaim_task); 686 break; 687 } 688 }
| 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/kernel.h> 36#include <sys/module.h> 37#include <sys/bus.h> 38#include <sys/conf.h> 39#include <machine/bus.h> 40#include <machine/resource.h> 41#include <sys/bus_dma.h> 42#include <sys/rman.h> 43#include <sys/queue.h> 44#include <sys/sysctl.h> 45#include <sys/taskqueue.h> 46 47 48#include <sys/proc.h> 49#include <sys/sched.h> 50#include <sys/smp.h> 51#include <sys/systm.h> 52 53#include <netinet/in_systm.h> 54#include <netinet/in.h> 55#include <netinet/ip.h> 56#include <netinet/tcp.h> 57 58#include <dev/pci/pcireg.h> 59#include <dev/pci/pcivar.h> 60 61#ifdef CONFIG_DEFINED 62#include <cxgb_include.h> 63#else 64#include <dev/cxgb/cxgb_include.h> 65#endif 66 67uint32_t collapse_free = 0; 68uint32_t mb_free_vec_free = 0; 69int collapse_mbufs = 0; 70static int recycle_enable = 1; 71 72 73/* 74 * XXX GC 75 */ 76#define NET_XMIT_CN 2 77#define NET_XMIT_SUCCESS 0 78 79#define USE_GTS 0 80 81#define SGE_RX_SM_BUF_SIZE 1536 82#define SGE_RX_DROP_THRES 16 83#define SGE_RX_COPY_THRES 128 84 85/* 86 * Period of the Tx buffer reclaim timer. This timer does not need to run 87 * frequently as Tx buffers are usually reclaimed by new Tx packets. 88 */ 89#define TX_RECLAIM_PERIOD (hz >> 1) 90 91/* 92 * work request size in bytes 93 */ 94#define WR_LEN (WR_FLITS * 8) 95 96/* 97 * Values for sge_txq.flags 98 */ 99enum { 100 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 101 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 102}; 103 104struct tx_desc { 105 uint64_t flit[TX_DESC_FLITS]; 106} __packed; 107 108struct rx_desc { 109 uint32_t addr_lo; 110 uint32_t len_gen; 111 uint32_t gen2; 112 uint32_t addr_hi; 113} __packed;; 114 115struct rsp_desc { /* response queue descriptor */ 116 struct rss_header rss_hdr; 117 uint32_t flags; 118 uint32_t len_cq; 119 uint8_t imm_data[47]; 120 uint8_t intr_gen; 121} __packed; 122 123#define RX_SW_DESC_MAP_CREATED (1 << 0) 124#define TX_SW_DESC_MAP_CREATED (1 << 1) 125#define RX_SW_DESC_INUSE (1 << 3) 126#define TX_SW_DESC_MAPPED (1 << 4) 127 128#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 129#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 130#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 131#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 132 133struct tx_sw_desc { /* SW state per Tx descriptor */ 134 struct mbuf *m; 135 bus_dmamap_t map; 136 int flags; 137}; 138 139struct rx_sw_desc { /* SW state per Rx descriptor */ 140 void *cl; 141 bus_dmamap_t map; 142 int flags; 143}; 144 145struct txq_state { 146 unsigned int compl; 147 unsigned int gen; 148 unsigned int pidx; 149}; 150 151struct refill_fl_cb_arg { 152 int error; 153 bus_dma_segment_t seg; 154 int nseg; 155}; 156 157/* 158 * Maps a number of flits to the number of Tx descriptors that can hold them. 159 * The formula is 160 * 161 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 162 * 163 * HW allows up to 4 descriptors to be combined into a WR. 164 */ 165static uint8_t flit_desc_map[] = { 166 0, 167#if SGE_NUM_GENBITS == 1 168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 170 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 171 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 172#elif SGE_NUM_GENBITS == 2 173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 174 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 175 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 176 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 177#else 178# error "SGE_NUM_GENBITS must be 1 or 2" 179#endif 180}; 181 182 183static int lro_default = 0; 184int cxgb_debug = 0; 185 186static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 187static void sge_timer_cb(void *arg); 188static void sge_timer_reclaim(void *arg, int ncount); 189static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec); 190 191/** 192 * reclaim_completed_tx - reclaims completed Tx descriptors 193 * @adapter: the adapter 194 * @q: the Tx queue to reclaim completed descriptors from 195 * 196 * Reclaims Tx descriptors that the SGE has indicated it has processed, 197 * and frees the associated buffers if possible. Called with the Tx 198 * queue's lock held. 199 */ 200static __inline int 201reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec) 202{ 203 int reclaimed, reclaim = desc_reclaimable(q); 204 int n = 0; 205 206 mtx_assert(&q->lock, MA_OWNED); 207 if (reclaim > 0) { 208 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec); 209 reclaimed = min(reclaim, nbufs); 210 q->cleaned += reclaimed; 211 q->in_use -= reclaimed; 212 } 213 return (n); 214} 215 216/** 217 * should_restart_tx - are there enough resources to restart a Tx queue? 218 * @q: the Tx queue 219 * 220 * Checks if there are enough descriptors to restart a suspended Tx queue. 221 */ 222static __inline int 223should_restart_tx(const struct sge_txq *q) 224{ 225 unsigned int r = q->processed - q->cleaned; 226 227 return q->in_use - r < (q->size >> 1); 228} 229 230/** 231 * t3_sge_init - initialize SGE 232 * @adap: the adapter 233 * @p: the SGE parameters 234 * 235 * Performs SGE initialization needed every time after a chip reset. 236 * We do not initialize any of the queue sets here, instead the driver 237 * top-level must request those individually. We also do not enable DMA 238 * here, that should be done after the queues have been set up. 239 */ 240void 241t3_sge_init(adapter_t *adap, struct sge_params *p) 242{ 243 u_int ctrl, ups; 244 245 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 246 247 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 248 F_CQCRDTCTRL | 249 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 250 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 251#if SGE_NUM_GENBITS == 1 252 ctrl |= F_EGRGENCTRL; 253#endif 254 if (adap->params.rev > 0) { 255 if (!(adap->flags & (USING_MSIX | USING_MSI))) 256 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 257 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 258 } 259 t3_write_reg(adap, A_SG_CONTROL, ctrl); 260 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 261 V_LORCQDRBTHRSH(512)); 262 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 263 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 264 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 265 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 266 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 267 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 268 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 269 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 270 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 271} 272 273 274/** 275 * sgl_len - calculates the size of an SGL of the given capacity 276 * @n: the number of SGL entries 277 * 278 * Calculates the number of flits needed for a scatter/gather list that 279 * can hold the given number of entries. 280 */ 281static __inline unsigned int 282sgl_len(unsigned int n) 283{ 284 return ((3 * n) / 2 + (n & 1)); 285} 286 287/** 288 * get_imm_packet - return the next ingress packet buffer from a response 289 * @resp: the response descriptor containing the packet data 290 * 291 * Return a packet containing the immediate data of the given response. 292 */ 293static __inline void 294get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl) 295{ 296 int len; 297 uint32_t flags = ntohl(resp->flags); 298 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 299 300 /* 301 * would be a firmware bug 302 */ 303 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 304 return; 305 306 len = G_RSPD_LEN(ntohl(resp->len_cq)); 307 switch (sopeop) { 308 case RSPQ_SOP_EOP: 309 m->m_len = m->m_pkthdr.len = len; 310 memcpy(mtod(m, uint8_t *), resp->imm_data, len); 311 break; 312 case RSPQ_EOP: 313 memcpy(cl, resp->imm_data, len); 314 m_iovappend(m, cl, MSIZE, len, 0); 315 break; 316 } 317} 318 319 320static __inline u_int 321flits_to_desc(u_int n) 322{ 323 return (flit_desc_map[n]); 324} 325 326void 327t3_sge_err_intr_handler(adapter_t *adapter) 328{ 329 unsigned int v, status; 330 331 332 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 333 334 if (status & F_RSPQCREDITOVERFOW) 335 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 336 337 if (status & F_RSPQDISABLED) { 338 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 339 340 CH_ALERT(adapter, 341 "packet delivered to disabled response queue (0x%x)\n", 342 (v >> S_RSPQ0DISABLED) & 0xff); 343 } 344 345 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 346 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 347 t3_fatal_err(adapter); 348} 349 350void 351t3_sge_prep(adapter_t *adap, struct sge_params *p) 352{ 353 int i; 354 355 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 356 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 357 358 for (i = 0; i < SGE_QSETS; ++i) { 359 struct qset_params *q = p->qset + i; 360 361 q->polling = adap->params.rev > 0; 362 363 q->coalesce_nsecs = 5000; 364 365 q->rspq_size = RSPQ_Q_SIZE; 366 q->fl_size = FL_Q_SIZE; 367 q->jumbo_size = JUMBO_Q_SIZE; 368 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 369 q->txq_size[TXQ_OFLD] = 1024; 370 q->txq_size[TXQ_CTRL] = 256; 371 q->cong_thres = 0; 372 } 373} 374 375int 376t3_sge_alloc(adapter_t *sc) 377{ 378 379 /* The parent tag. */ 380 if (bus_dma_tag_create( NULL, /* parent */ 381 1, 0, /* algnmnt, boundary */ 382 BUS_SPACE_MAXADDR, /* lowaddr */ 383 BUS_SPACE_MAXADDR, /* highaddr */ 384 NULL, NULL, /* filter, filterarg */ 385 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 386 BUS_SPACE_UNRESTRICTED, /* nsegments */ 387 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 388 0, /* flags */ 389 NULL, NULL, /* lock, lockarg */ 390 &sc->parent_dmat)) { 391 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 392 return (ENOMEM); 393 } 394 395 /* 396 * DMA tag for normal sized RX frames 397 */ 398 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 399 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 400 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 401 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 402 return (ENOMEM); 403 } 404 405 /* 406 * DMA tag for jumbo sized RX frames. 407 */ 408 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 409 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 410 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 411 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 412 return (ENOMEM); 413 } 414 415 /* 416 * DMA tag for TX frames. 417 */ 418 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 419 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 420 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 421 NULL, NULL, &sc->tx_dmat)) { 422 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 423 return (ENOMEM); 424 } 425 426 return (0); 427} 428 429int 430t3_sge_free(struct adapter * sc) 431{ 432 433 if (sc->tx_dmat != NULL) 434 bus_dma_tag_destroy(sc->tx_dmat); 435 436 if (sc->rx_jumbo_dmat != NULL) 437 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 438 439 if (sc->rx_dmat != NULL) 440 bus_dma_tag_destroy(sc->rx_dmat); 441 442 if (sc->parent_dmat != NULL) 443 bus_dma_tag_destroy(sc->parent_dmat); 444 445 return (0); 446} 447 448void 449t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 450{ 451 452 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 453 qs->rspq.polling = 0 /* p->polling */; 454} 455 456static void 457refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 458{ 459 struct refill_fl_cb_arg *cb_arg = arg; 460 461 cb_arg->error = error; 462 cb_arg->seg = segs[0]; 463 cb_arg->nseg = nseg; 464 465} 466 467/** 468 * refill_fl - refill an SGE free-buffer list 469 * @sc: the controller softc 470 * @q: the free-list to refill 471 * @n: the number of new buffers to allocate 472 * 473 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 474 * The caller must assure that @n does not exceed the queue's capacity. 475 */ 476static void 477refill_fl(adapter_t *sc, struct sge_fl *q, int n) 478{ 479 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 480 struct rx_desc *d = &q->desc[q->pidx]; 481 struct refill_fl_cb_arg cb_arg; 482 void *cl; 483 int err; 484 485 cb_arg.error = 0; 486 while (n--) { 487 /* 488 * We only allocate a cluster, mbuf allocation happens after rx 489 */ 490 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { 491 log(LOG_WARNING, "Failed to allocate cluster\n"); 492 goto done; 493 } 494 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 495 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 496 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 497 uma_zfree(q->zone, cl); 498 goto done; 499 } 500 sd->flags |= RX_SW_DESC_MAP_CREATED; 501 } 502 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, 503 refill_fl_cb, &cb_arg, 0); 504 505 if (err != 0 || cb_arg.error) { 506 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 507 /* 508 * XXX free cluster 509 */ 510 return; 511 } 512 513 sd->flags |= RX_SW_DESC_INUSE; 514 sd->cl = cl; 515 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 516 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 517 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 518 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 519 520 d++; 521 sd++; 522 523 if (++q->pidx == q->size) { 524 q->pidx = 0; 525 q->gen ^= 1; 526 sd = q->sdesc; 527 d = q->desc; 528 } 529 q->credits++; 530 } 531 532done: 533 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 534} 535 536 537/** 538 * free_rx_bufs - free the Rx buffers on an SGE free list 539 * @sc: the controle softc 540 * @q: the SGE free list to clean up 541 * 542 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 543 * this queue should be stopped before calling this function. 544 */ 545static void 546free_rx_bufs(adapter_t *sc, struct sge_fl *q) 547{ 548 u_int cidx = q->cidx; 549 550 while (q->credits--) { 551 struct rx_sw_desc *d = &q->sdesc[cidx]; 552 553 if (d->flags & RX_SW_DESC_INUSE) { 554 bus_dmamap_unload(q->entry_tag, d->map); 555 bus_dmamap_destroy(q->entry_tag, d->map); 556 uma_zfree(q->zone, d->cl); 557 } 558 d->cl = NULL; 559 if (++cidx == q->size) 560 cidx = 0; 561 } 562} 563 564static __inline void 565__refill_fl(adapter_t *adap, struct sge_fl *fl) 566{ 567 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 568} 569 570/** 571 * recycle_rx_buf - recycle a receive buffer 572 * @adapter: the adapter 573 * @q: the SGE free list 574 * @idx: index of buffer to recycle 575 * 576 * Recycles the specified buffer on the given free list by adding it at 577 * the next available slot on the list. 578 */ 579static void 580recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 581{ 582 struct rx_desc *from = &q->desc[idx]; 583 struct rx_desc *to = &q->desc[q->pidx]; 584 585 q->sdesc[q->pidx] = q->sdesc[idx]; 586 to->addr_lo = from->addr_lo; // already big endian 587 to->addr_hi = from->addr_hi; // likewise 588 wmb(); 589 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 590 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 591 q->credits++; 592 593 if (++q->pidx == q->size) { 594 q->pidx = 0; 595 q->gen ^= 1; 596 } 597 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 598} 599 600static void 601alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 602{ 603 uint32_t *addr; 604 605 addr = arg; 606 *addr = segs[0].ds_addr; 607} 608 609static int 610alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 611 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 612 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 613{ 614 size_t len = nelem * elem_size; 615 void *s = NULL; 616 void *p = NULL; 617 int err; 618 619 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 620 BUS_SPACE_MAXADDR_32BIT, 621 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 622 len, 0, NULL, NULL, tag)) != 0) { 623 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 624 return (ENOMEM); 625 } 626 627 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 628 map)) != 0) { 629 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 630 return (ENOMEM); 631 } 632 633 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 634 bzero(p, len); 635 *(void **)desc = p; 636 637 if (sw_size) { 638 len = nelem * sw_size; 639 s = malloc(len, M_DEVBUF, M_WAITOK); 640 bzero(s, len); 641 *(void **)sdesc = s; 642 } 643 if (parent_entry_tag == NULL) 644 return (0); 645 646 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 647 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 648 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 649 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 650 NULL, NULL, entry_tag)) != 0) { 651 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 652 return (ENOMEM); 653 } 654 return (0); 655} 656 657static void 658sge_slow_intr_handler(void *arg, int ncount) 659{ 660 adapter_t *sc = arg; 661 662 t3_slow_intr_handler(sc); 663} 664 665static void 666sge_timer_cb(void *arg) 667{ 668 adapter_t *sc = arg; 669 struct port_info *p; 670 struct sge_qset *qs; 671 struct sge_txq *txq; 672 int i, j; 673 int reclaim_eth, reclaim_ofl, refill_rx; 674 675 for (i = 0; i < sc->params.nports; i++) 676 for (j = 0; j < sc->port[i].nqsets; j++) { 677 qs = &sc->sge.qs[i + j]; 678 txq = &qs->txq[0]; 679 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 680 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 681 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 682 (qs->fl[1].credits < qs->fl[1].size)); 683 if (reclaim_eth || reclaim_ofl || refill_rx) { 684 p = &sc->port[i]; 685 taskqueue_enqueue(p->tq, &p->timer_reclaim_task); 686 break; 687 } 688 }
|
689 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
| 689 if (sc->open_device_map != 0) 690 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
|
690} 691 692/* 693 * This is meant to be a catch-all function to keep sge state private 694 * to sge.c 695 * 696 */ 697int 698t3_sge_init_adapter(adapter_t *sc) 699{ 700 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 701 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 702 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 703 return (0); 704} 705 706int 707t3_sge_init_port(struct port_info *p) 708{ 709 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p); 710 return (0); 711} 712 713void 714t3_sge_deinit_sw(adapter_t *sc) 715{ 716 int i; 717 718 callout_drain(&sc->sge_timer_ch); 719 if (sc->tq) 720 taskqueue_drain(sc->tq, &sc->slow_intr_task); 721 for (i = 0; i < sc->params.nports; i++) 722 if (sc->port[i].tq != NULL) 723 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); 724} 725 726/** 727 * refill_rspq - replenish an SGE response queue 728 * @adapter: the adapter 729 * @q: the response queue to replenish 730 * @credits: how many new responses to make available 731 * 732 * Replenishes a response queue by making the supplied number of responses 733 * available to HW. 734 */ 735static __inline void 736refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 737{ 738 739 /* mbufs are allocated on demand when a rspq entry is processed. */ 740 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 741 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 742} 743 744 745static void 746sge_timer_reclaim(void *arg, int ncount) 747{ 748 struct port_info *p = arg; 749 int i, nqsets = p->nqsets; 750 adapter_t *sc = p->adapter; 751 struct sge_qset *qs; 752 struct sge_txq *txq; 753 struct mtx *lock; 754 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 755 int n, reclaimable; 756 757 for (i = 0; i < nqsets; i++) { 758 qs = &sc->sge.qs[i]; 759 txq = &qs->txq[TXQ_ETH]; 760 reclaimable = desc_reclaimable(txq); 761 if (reclaimable > 0) { 762 mtx_lock(&txq->lock); 763 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 764 mtx_unlock(&txq->lock); 765 766 for (i = 0; i < n; i++) 767 m_freem_vec(m_vec[i]); 768 769 if (p->ifp->if_drv_flags & IFF_DRV_OACTIVE && 770 txq->size - txq->in_use >= TX_START_MAX_DESC) { 771 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 772 taskqueue_enqueue(p->tq, &p->start_task); 773 } 774 } 775 776 txq = &qs->txq[TXQ_OFLD]; 777 reclaimable = desc_reclaimable(txq); 778 if (reclaimable > 0) { 779 mtx_lock(&txq->lock); 780 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 781 mtx_unlock(&txq->lock); 782 783 for (i = 0; i < n; i++) 784 m_freem_vec(m_vec[i]); 785 } 786 787 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 788 &sc->sge.qs[0].rspq.lock; 789 790 if (mtx_trylock(lock)) { 791 /* XXX currently assume that we are *NOT* polling */ 792 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 793 794 if (qs->fl[0].credits < qs->fl[0].size - 16) 795 __refill_fl(sc, &qs->fl[0]); 796 if (qs->fl[1].credits < qs->fl[1].size - 16) 797 __refill_fl(sc, &qs->fl[1]); 798 799 if (status & (1 << qs->rspq.cntxt_id)) { 800 if (qs->rspq.credits) { 801 refill_rspq(sc, &qs->rspq, 1); 802 qs->rspq.credits--; 803 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 804 1 << qs->rspq.cntxt_id); 805 } 806 } 807 mtx_unlock(lock); 808 } 809 } 810} 811 812/** 813 * init_qset_cntxt - initialize an SGE queue set context info 814 * @qs: the queue set 815 * @id: the queue set id 816 * 817 * Initializes the TIDs and context ids for the queues of a queue set. 818 */ 819static void 820init_qset_cntxt(struct sge_qset *qs, u_int id) 821{ 822 823 qs->rspq.cntxt_id = id; 824 qs->fl[0].cntxt_id = 2 * id; 825 qs->fl[1].cntxt_id = 2 * id + 1; 826 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 827 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 828 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 829 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 830 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 831} 832 833 834static void 835txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 836{ 837 txq->in_use += ndesc; 838 /* 839 * XXX we don't handle stopping of queue 840 * presumably start handles this when we bump against the end 841 */ 842 txqs->gen = txq->gen; 843 txq->unacked += ndesc; 844 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 845 txq->unacked &= 7; 846 txqs->pidx = txq->pidx; 847 txq->pidx += ndesc; 848 849 if (txq->pidx >= txq->size) { 850 txq->pidx -= txq->size; 851 txq->gen ^= 1; 852 } 853 854} 855 856/** 857 * calc_tx_descs - calculate the number of Tx descriptors for a packet 858 * @m: the packet mbufs 859 * @nsegs: the number of segments 860 * 861 * Returns the number of Tx descriptors needed for the given Ethernet 862 * packet. Ethernet packets require addition of WR and CPL headers. 863 */ 864static __inline unsigned int 865calc_tx_descs(const struct mbuf *m, int nsegs) 866{ 867 unsigned int flits; 868 869 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 870 return 1; 871 872 flits = sgl_len(nsegs) + 2; 873#ifdef TSO_SUPPORTED 874 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) 875 flits++; 876#endif 877 return flits_to_desc(flits); 878} 879 880static unsigned int 881busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 882 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) 883{ 884 struct mbuf *m0; 885 int err, pktlen; 886 887 m0 = *m; 888 pktlen = m0->m_pkthdr.len; 889 890 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 891#ifdef DEBUG 892 if (err) { 893 int n = 0; 894 struct mbuf *mtmp = m0; 895 while(mtmp) { 896 n++; 897 mtmp = mtmp->m_next; 898 } 899 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 900 err, m0->m_pkthdr.len, n); 901 } 902#endif 903 if (err == EFBIG) { 904 /* Too many segments, try to defrag */ 905 m0 = m_defrag(m0, M_NOWAIT); 906 if (m0 == NULL) { 907 m_freem(*m); 908 *m = NULL; 909 return (ENOBUFS); 910 } 911 *m = m0; 912 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 913 } 914 915 if (err == ENOMEM) { 916 return (err); 917 } 918 919 if (err) { 920 if (cxgb_debug) 921 printf("map failure err=%d pktlen=%d\n", err, pktlen); 922 m_freem_vec(m0); 923 *m = NULL; 924 return (err); 925 } 926 927 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE); 928 stx->flags |= TX_SW_DESC_MAPPED; 929 930 return (0); 931} 932 933/** 934 * make_sgl - populate a scatter/gather list for a packet 935 * @sgp: the SGL to populate 936 * @segs: the packet dma segments 937 * @nsegs: the number of segments 938 * 939 * Generates a scatter/gather list for the buffers that make up a packet 940 * and returns the SGL size in 8-byte words. The caller must size the SGL 941 * appropriately. 942 */ 943static __inline void 944make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 945{ 946 int i, idx; 947 948 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 949 if (i && idx == 0) 950 ++sgp; 951 952 sgp->len[idx] = htobe32(segs[i].ds_len); 953 sgp->addr[idx] = htobe64(segs[i].ds_addr); 954 } 955 956 if (idx) 957 sgp->len[idx] = 0; 958} 959 960/** 961 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 962 * @adap: the adapter 963 * @q: the Tx queue 964 * 965 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 966 * where the HW is going to sleep just after we checked, however, 967 * then the interrupt handler will detect the outstanding TX packet 968 * and ring the doorbell for us. 969 * 970 * When GTS is disabled we unconditionally ring the doorbell. 971 */ 972static __inline void 973check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 974{ 975#if USE_GTS 976 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 977 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 978 set_bit(TXQ_LAST_PKT_DB, &q->flags); 979#ifdef T3_TRACE 980 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 981 q->cntxt_id); 982#endif 983 t3_write_reg(adap, A_SG_KDOORBELL, 984 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 985 } 986#else 987 wmb(); /* write descriptors before telling HW */ 988 t3_write_reg(adap, A_SG_KDOORBELL, 989 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 990#endif 991} 992 993static __inline void 994wr_gen2(struct tx_desc *d, unsigned int gen) 995{ 996#if SGE_NUM_GENBITS == 2 997 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 998#endif 999} 1000 1001 1002 1003/** 1004 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1005 * @ndesc: number of Tx descriptors spanned by the SGL 1006 * @txd: first Tx descriptor to be written 1007 * @txqs: txq state (generation and producer index) 1008 * @txq: the SGE Tx queue 1009 * @sgl: the SGL 1010 * @flits: number of flits to the start of the SGL in the first descriptor 1011 * @sgl_flits: the SGL size in flits 1012 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1013 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1014 * 1015 * Write a work request header and an associated SGL. If the SGL is 1016 * small enough to fit into one Tx descriptor it has already been written 1017 * and we just need to write the WR header. Otherwise we distribute the 1018 * SGL across the number of descriptors it spans. 1019 */ 1020 1021static void 1022write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1023 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1024 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1025{ 1026 1027 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1028 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1029 1030 if (__predict_true(ndesc == 1)) { 1031 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1032 V_WR_SGLSFLT(flits)) | wr_hi; 1033 wmb(); 1034 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1035 V_WR_GEN(txqs->gen)) | wr_lo; 1036 /* XXX gen? */ 1037 wr_gen2(txd, txqs->gen); 1038 } else { 1039 unsigned int ogen = txqs->gen; 1040 const uint64_t *fp = (const uint64_t *)sgl; 1041 struct work_request_hdr *wp = wrp; 1042 1043 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1044 V_WR_SGLSFLT(flits)) | wr_hi; 1045 1046 while (sgl_flits) { 1047 unsigned int avail = WR_FLITS - flits; 1048 1049 if (avail > sgl_flits) 1050 avail = sgl_flits; 1051 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1052 sgl_flits -= avail; 1053 ndesc--; 1054 if (!sgl_flits) 1055 break; 1056 1057 fp += avail; 1058 txd++; 1059 txsd++; 1060 if (++txqs->pidx == txq->size) { 1061 txqs->pidx = 0; 1062 txqs->gen ^= 1; 1063 txd = txq->desc; 1064 txsd = txq->sdesc; 1065 } 1066 1067 /* 1068 * when the head of the mbuf chain 1069 * is freed all clusters will be freed 1070 * with it 1071 */ 1072 txsd->m = NULL; 1073 wrp = (struct work_request_hdr *)txd; 1074 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1075 V_WR_SGLSFLT(1)) | wr_hi; 1076 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1077 sgl_flits + 1)) | 1078 V_WR_GEN(txqs->gen)) | wr_lo; 1079 wr_gen2(txd, txqs->gen); 1080 flits = 1; 1081 } 1082 wrp->wr_hi |= htonl(F_WR_EOP); 1083 wmb(); 1084 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1085 wr_gen2((struct tx_desc *)wp, ogen); 1086 } 1087} 1088 1089 1090/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1091#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1092 1093int 1094t3_encap(struct port_info *p, struct mbuf **m) 1095{ 1096 adapter_t *sc; 1097 struct mbuf *m0; 1098 struct sge_qset *qs; 1099 struct sge_txq *txq; 1100 struct tx_sw_desc *stx; 1101 struct txq_state txqs; 1102 unsigned int nsegs, ndesc, flits, cntrl, mlen; 1103 int err, tso_info = 0; 1104 1105 struct work_request_hdr *wrp; 1106 struct tx_sw_desc *txsd; 1107 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1108 bus_dma_segment_t segs[TX_MAX_SEGS]; 1109 uint32_t wr_hi, wr_lo, sgl_flits; 1110 1111 struct tx_desc *txd; 1112 struct cpl_tx_pkt *cpl; 1113 1114 DPRINTF("t3_encap "); 1115 m0 = *m; 1116 sc = p->adapter; 1117 qs = &sc->sge.qs[p->first_qset]; 1118 txq = &qs->txq[TXQ_ETH]; 1119 stx = &txq->sdesc[txq->pidx]; 1120 txd = &txq->desc[txq->pidx]; 1121 cpl = (struct cpl_tx_pkt *)txd; 1122 mlen = m0->m_pkthdr.len; 1123 cpl->len = htonl(mlen | 0x80000000); 1124 1125 DPRINTF("mlen=%d\n", mlen); 1126 /* 1127 * XXX handle checksum, TSO, and VLAN here 1128 * 1129 */ 1130 cntrl = V_TXPKT_INTF(p->port); 1131 1132 /* 1133 * XXX need to add VLAN support for 6.x 1134 */ 1135#ifdef VLAN_SUPPORTED 1136 if (m0->m_flags & M_VLANTAG) 1137 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 1138 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1139 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1140#endif 1141 if (tso_info) { 1142 int eth_type; 1143 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1144 struct ip *ip; 1145 struct tcphdr *tcp; 1146 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1147 1148 txd->flit[2] = 0; 1149 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1150 hdr->cntrl = htonl(cntrl); 1151 1152 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1153 pkthdr = &tmp[0]; 1154 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1155 } else { 1156 pkthdr = mtod(m0, uint8_t *); 1157 } 1158 1159 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1160 eth_type = CPL_ETH_II_VLAN; 1161 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1162 ETHER_VLAN_ENCAP_LEN); 1163 } else { 1164 eth_type = CPL_ETH_II; 1165 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1166 } 1167 tcp = (struct tcphdr *)((uint8_t *)ip + 1168 sizeof(*ip)); 1169 1170 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1171 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1172 V_LSO_TCPHDR_WORDS(tcp->th_off); 1173 hdr->lso_info = htonl(tso_info); 1174 flits = 3; 1175 } else { 1176 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1177 cpl->cntrl = htonl(cntrl); 1178 1179 if (mlen <= WR_LEN - sizeof(*cpl)) { 1180 txq_prod(txq, 1, &txqs); 1181 txq->sdesc[txqs.pidx].m = m0; 1182 m_set_priority(m0, txqs.pidx); 1183 1184 if (m0->m_len == m0->m_pkthdr.len) 1185 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen); 1186 else 1187 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1188 1189 flits = (mlen + 7) / 8 + 2; 1190 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1191 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1192 F_WR_SOP | F_WR_EOP | txqs.compl); 1193 wmb(); 1194 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1195 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1196 1197 wr_gen2(txd, txqs.gen); 1198 check_ring_tx_db(sc, txq); 1199 return (0); 1200 } 1201 flits = 2; 1202 } 1203 1204 wrp = (struct work_request_hdr *)txd; 1205 1206 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { 1207 return (err); 1208 } 1209 m0 = *m; 1210 ndesc = calc_tx_descs(m0, nsegs); 1211 1212 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1213 make_sgl(sgp, segs, nsegs); 1214 1215 sgl_flits = sgl_len(nsegs); 1216 1217 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1218 txq_prod(txq, ndesc, &txqs); 1219 txsd = &txq->sdesc[txqs.pidx]; 1220 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1221 wr_lo = htonl(V_WR_TID(txq->token)); 1222 txsd->m = m0; 1223 m_set_priority(m0, txqs.pidx); 1224 1225 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1226 check_ring_tx_db(p->adapter, txq); 1227 1228 return (0); 1229} 1230 1231 1232/** 1233 * write_imm - write a packet into a Tx descriptor as immediate data 1234 * @d: the Tx descriptor to write 1235 * @m: the packet 1236 * @len: the length of packet data to write as immediate data 1237 * @gen: the generation bit value to write 1238 * 1239 * Writes a packet as immediate data into a Tx descriptor. The packet 1240 * contains a work request at its beginning. We must write the packet 1241 * carefully so the SGE doesn't read accidentally before it's written in 1242 * its entirety. 1243 */ 1244static __inline void 1245write_imm(struct tx_desc *d, struct mbuf *m, 1246 unsigned int len, unsigned int gen) 1247{ 1248 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1249 struct work_request_hdr *to = (struct work_request_hdr *)d; 1250 1251 memcpy(&to[1], &from[1], len - sizeof(*from)); 1252 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1253 V_WR_BCNTLFLT(len & 7)); 1254 wmb(); 1255 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1256 V_WR_LEN((len + 7) / 8)); 1257 wr_gen2(d, gen); 1258 m_freem(m); 1259} 1260 1261/** 1262 * check_desc_avail - check descriptor availability on a send queue 1263 * @adap: the adapter 1264 * @q: the TX queue 1265 * @m: the packet needing the descriptors 1266 * @ndesc: the number of Tx descriptors needed 1267 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1268 * 1269 * Checks if the requested number of Tx descriptors is available on an 1270 * SGE send queue. If the queue is already suspended or not enough 1271 * descriptors are available the packet is queued for later transmission. 1272 * Must be called with the Tx queue locked. 1273 * 1274 * Returns 0 if enough descriptors are available, 1 if there aren't 1275 * enough descriptors and the packet has been queued, and 2 if the caller 1276 * needs to retry because there weren't enough descriptors at the 1277 * beginning of the call but some freed up in the mean time. 1278 */ 1279static __inline int 1280check_desc_avail(adapter_t *adap, struct sge_txq *q, 1281 struct mbuf *m, unsigned int ndesc, 1282 unsigned int qid) 1283{ 1284 /* 1285 * XXX We currently only use this for checking the control queue 1286 * the control queue is only used for binding qsets which happens 1287 * at init time so we are guaranteed enough descriptors 1288 */ 1289 if (__predict_false(!mbufq_empty(&q->sendq))) { 1290addq_exit: mbufq_tail(&q->sendq, m); 1291 return 1; 1292 } 1293 if (__predict_false(q->size - q->in_use < ndesc)) { 1294 1295 struct sge_qset *qs = txq_to_qset(q, qid); 1296 1297 setbit(&qs->txq_stopped, qid); 1298 smp_mb(); 1299 1300 if (should_restart_tx(q) && 1301 test_and_clear_bit(qid, &qs->txq_stopped)) 1302 return 2; 1303 1304 q->stops++; 1305 goto addq_exit; 1306 } 1307 return 0; 1308} 1309 1310 1311/** 1312 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1313 * @q: the SGE control Tx queue 1314 * 1315 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1316 * that send only immediate data (presently just the control queues) and 1317 * thus do not have any mbufs 1318 */ 1319static __inline void 1320reclaim_completed_tx_imm(struct sge_txq *q) 1321{ 1322 unsigned int reclaim = q->processed - q->cleaned; 1323 1324 mtx_assert(&q->lock, MA_OWNED); 1325 1326 q->in_use -= reclaim; 1327 q->cleaned += reclaim; 1328} 1329 1330static __inline int 1331immediate(const struct mbuf *m) 1332{ 1333 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1334} 1335 1336/** 1337 * ctrl_xmit - send a packet through an SGE control Tx queue 1338 * @adap: the adapter 1339 * @q: the control queue 1340 * @m: the packet 1341 * 1342 * Send a packet through an SGE control Tx queue. Packets sent through 1343 * a control queue must fit entirely as immediate data in a single Tx 1344 * descriptor and have no page fragments. 1345 */ 1346static int 1347ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1348{ 1349 int ret; 1350 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1351 1352 if (__predict_false(!immediate(m))) { 1353 m_freem(m); 1354 return 0; 1355 } 1356 1357 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1358 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1359 1360 mtx_lock(&q->lock); 1361again: reclaim_completed_tx_imm(q); 1362 1363 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1364 if (__predict_false(ret)) { 1365 if (ret == 1) { 1366 mtx_unlock(&q->lock); 1367 return (-1); 1368 } 1369 goto again; 1370 } 1371 1372 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1373 1374 q->in_use++; 1375 if (++q->pidx >= q->size) { 1376 q->pidx = 0; 1377 q->gen ^= 1; 1378 } 1379 mtx_unlock(&q->lock); 1380 wmb(); 1381 t3_write_reg(adap, A_SG_KDOORBELL, 1382 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1383 return (0); 1384} 1385 1386 1387/** 1388 * restart_ctrlq - restart a suspended control queue 1389 * @qs: the queue set cotaining the control queue 1390 * 1391 * Resumes transmission on a suspended Tx control queue. 1392 */ 1393static void 1394restart_ctrlq(void *data, int npending) 1395{ 1396 struct mbuf *m; 1397 struct sge_qset *qs = (struct sge_qset *)data; 1398 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1399 adapter_t *adap = qs->port->adapter; 1400 1401 mtx_lock(&q->lock); 1402again: reclaim_completed_tx_imm(q); 1403 1404 while (q->in_use < q->size && 1405 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1406 1407 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1408 1409 if (++q->pidx >= q->size) { 1410 q->pidx = 0; 1411 q->gen ^= 1; 1412 } 1413 q->in_use++; 1414 } 1415 if (!mbufq_empty(&q->sendq)) { 1416 setbit(&qs->txq_stopped, TXQ_CTRL); 1417 smp_mb(); 1418 1419 if (should_restart_tx(q) && 1420 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1421 goto again; 1422 q->stops++; 1423 } 1424 mtx_unlock(&q->lock); 1425 t3_write_reg(adap, A_SG_KDOORBELL, 1426 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1427} 1428 1429 1430/* 1431 * Send a management message through control queue 0 1432 */ 1433int 1434t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1435{ 1436 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1437} 1438 1439/** 1440 * free_qset - free the resources of an SGE queue set 1441 * @sc: the controller owning the queue set 1442 * @q: the queue set 1443 * 1444 * Release the HW and SW resources associated with an SGE queue set, such 1445 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1446 * queue set must be quiesced prior to calling this. 1447 */ 1448static void 1449t3_free_qset(adapter_t *sc, struct sge_qset *q) 1450{ 1451 int i; 1452 1453 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1454 if (q->fl[i].desc) { 1455 mtx_lock(&sc->sge.reg_lock); 1456 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1457 mtx_unlock(&sc->sge.reg_lock); 1458 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1459 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1460 q->fl[i].desc_map); 1461 bus_dma_tag_destroy(q->fl[i].desc_tag); 1462 bus_dma_tag_destroy(q->fl[i].entry_tag); 1463 } 1464 if (q->fl[i].sdesc) { 1465 free_rx_bufs(sc, &q->fl[i]); 1466 free(q->fl[i].sdesc, M_DEVBUF); 1467 } 1468 } 1469
| 691} 692 693/* 694 * This is meant to be a catch-all function to keep sge state private 695 * to sge.c 696 * 697 */ 698int 699t3_sge_init_adapter(adapter_t *sc) 700{ 701 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 702 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 703 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 704 return (0); 705} 706 707int 708t3_sge_init_port(struct port_info *p) 709{ 710 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p); 711 return (0); 712} 713 714void 715t3_sge_deinit_sw(adapter_t *sc) 716{ 717 int i; 718 719 callout_drain(&sc->sge_timer_ch); 720 if (sc->tq) 721 taskqueue_drain(sc->tq, &sc->slow_intr_task); 722 for (i = 0; i < sc->params.nports; i++) 723 if (sc->port[i].tq != NULL) 724 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); 725} 726 727/** 728 * refill_rspq - replenish an SGE response queue 729 * @adapter: the adapter 730 * @q: the response queue to replenish 731 * @credits: how many new responses to make available 732 * 733 * Replenishes a response queue by making the supplied number of responses 734 * available to HW. 735 */ 736static __inline void 737refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 738{ 739 740 /* mbufs are allocated on demand when a rspq entry is processed. */ 741 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 742 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 743} 744 745 746static void 747sge_timer_reclaim(void *arg, int ncount) 748{ 749 struct port_info *p = arg; 750 int i, nqsets = p->nqsets; 751 adapter_t *sc = p->adapter; 752 struct sge_qset *qs; 753 struct sge_txq *txq; 754 struct mtx *lock; 755 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 756 int n, reclaimable; 757 758 for (i = 0; i < nqsets; i++) { 759 qs = &sc->sge.qs[i]; 760 txq = &qs->txq[TXQ_ETH]; 761 reclaimable = desc_reclaimable(txq); 762 if (reclaimable > 0) { 763 mtx_lock(&txq->lock); 764 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 765 mtx_unlock(&txq->lock); 766 767 for (i = 0; i < n; i++) 768 m_freem_vec(m_vec[i]); 769 770 if (p->ifp->if_drv_flags & IFF_DRV_OACTIVE && 771 txq->size - txq->in_use >= TX_START_MAX_DESC) { 772 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 773 taskqueue_enqueue(p->tq, &p->start_task); 774 } 775 } 776 777 txq = &qs->txq[TXQ_OFLD]; 778 reclaimable = desc_reclaimable(txq); 779 if (reclaimable > 0) { 780 mtx_lock(&txq->lock); 781 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 782 mtx_unlock(&txq->lock); 783 784 for (i = 0; i < n; i++) 785 m_freem_vec(m_vec[i]); 786 } 787 788 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 789 &sc->sge.qs[0].rspq.lock; 790 791 if (mtx_trylock(lock)) { 792 /* XXX currently assume that we are *NOT* polling */ 793 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 794 795 if (qs->fl[0].credits < qs->fl[0].size - 16) 796 __refill_fl(sc, &qs->fl[0]); 797 if (qs->fl[1].credits < qs->fl[1].size - 16) 798 __refill_fl(sc, &qs->fl[1]); 799 800 if (status & (1 << qs->rspq.cntxt_id)) { 801 if (qs->rspq.credits) { 802 refill_rspq(sc, &qs->rspq, 1); 803 qs->rspq.credits--; 804 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 805 1 << qs->rspq.cntxt_id); 806 } 807 } 808 mtx_unlock(lock); 809 } 810 } 811} 812 813/** 814 * init_qset_cntxt - initialize an SGE queue set context info 815 * @qs: the queue set 816 * @id: the queue set id 817 * 818 * Initializes the TIDs and context ids for the queues of a queue set. 819 */ 820static void 821init_qset_cntxt(struct sge_qset *qs, u_int id) 822{ 823 824 qs->rspq.cntxt_id = id; 825 qs->fl[0].cntxt_id = 2 * id; 826 qs->fl[1].cntxt_id = 2 * id + 1; 827 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 828 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 829 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 830 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 831 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 832} 833 834 835static void 836txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 837{ 838 txq->in_use += ndesc; 839 /* 840 * XXX we don't handle stopping of queue 841 * presumably start handles this when we bump against the end 842 */ 843 txqs->gen = txq->gen; 844 txq->unacked += ndesc; 845 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 846 txq->unacked &= 7; 847 txqs->pidx = txq->pidx; 848 txq->pidx += ndesc; 849 850 if (txq->pidx >= txq->size) { 851 txq->pidx -= txq->size; 852 txq->gen ^= 1; 853 } 854 855} 856 857/** 858 * calc_tx_descs - calculate the number of Tx descriptors for a packet 859 * @m: the packet mbufs 860 * @nsegs: the number of segments 861 * 862 * Returns the number of Tx descriptors needed for the given Ethernet 863 * packet. Ethernet packets require addition of WR and CPL headers. 864 */ 865static __inline unsigned int 866calc_tx_descs(const struct mbuf *m, int nsegs) 867{ 868 unsigned int flits; 869 870 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 871 return 1; 872 873 flits = sgl_len(nsegs) + 2; 874#ifdef TSO_SUPPORTED 875 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) 876 flits++; 877#endif 878 return flits_to_desc(flits); 879} 880 881static unsigned int 882busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 883 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) 884{ 885 struct mbuf *m0; 886 int err, pktlen; 887 888 m0 = *m; 889 pktlen = m0->m_pkthdr.len; 890 891 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 892#ifdef DEBUG 893 if (err) { 894 int n = 0; 895 struct mbuf *mtmp = m0; 896 while(mtmp) { 897 n++; 898 mtmp = mtmp->m_next; 899 } 900 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 901 err, m0->m_pkthdr.len, n); 902 } 903#endif 904 if (err == EFBIG) { 905 /* Too many segments, try to defrag */ 906 m0 = m_defrag(m0, M_NOWAIT); 907 if (m0 == NULL) { 908 m_freem(*m); 909 *m = NULL; 910 return (ENOBUFS); 911 } 912 *m = m0; 913 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 914 } 915 916 if (err == ENOMEM) { 917 return (err); 918 } 919 920 if (err) { 921 if (cxgb_debug) 922 printf("map failure err=%d pktlen=%d\n", err, pktlen); 923 m_freem_vec(m0); 924 *m = NULL; 925 return (err); 926 } 927 928 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE); 929 stx->flags |= TX_SW_DESC_MAPPED; 930 931 return (0); 932} 933 934/** 935 * make_sgl - populate a scatter/gather list for a packet 936 * @sgp: the SGL to populate 937 * @segs: the packet dma segments 938 * @nsegs: the number of segments 939 * 940 * Generates a scatter/gather list for the buffers that make up a packet 941 * and returns the SGL size in 8-byte words. The caller must size the SGL 942 * appropriately. 943 */ 944static __inline void 945make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 946{ 947 int i, idx; 948 949 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 950 if (i && idx == 0) 951 ++sgp; 952 953 sgp->len[idx] = htobe32(segs[i].ds_len); 954 sgp->addr[idx] = htobe64(segs[i].ds_addr); 955 } 956 957 if (idx) 958 sgp->len[idx] = 0; 959} 960 961/** 962 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 963 * @adap: the adapter 964 * @q: the Tx queue 965 * 966 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 967 * where the HW is going to sleep just after we checked, however, 968 * then the interrupt handler will detect the outstanding TX packet 969 * and ring the doorbell for us. 970 * 971 * When GTS is disabled we unconditionally ring the doorbell. 972 */ 973static __inline void 974check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 975{ 976#if USE_GTS 977 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 978 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 979 set_bit(TXQ_LAST_PKT_DB, &q->flags); 980#ifdef T3_TRACE 981 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 982 q->cntxt_id); 983#endif 984 t3_write_reg(adap, A_SG_KDOORBELL, 985 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 986 } 987#else 988 wmb(); /* write descriptors before telling HW */ 989 t3_write_reg(adap, A_SG_KDOORBELL, 990 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 991#endif 992} 993 994static __inline void 995wr_gen2(struct tx_desc *d, unsigned int gen) 996{ 997#if SGE_NUM_GENBITS == 2 998 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 999#endif 1000} 1001 1002 1003 1004/** 1005 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1006 * @ndesc: number of Tx descriptors spanned by the SGL 1007 * @txd: first Tx descriptor to be written 1008 * @txqs: txq state (generation and producer index) 1009 * @txq: the SGE Tx queue 1010 * @sgl: the SGL 1011 * @flits: number of flits to the start of the SGL in the first descriptor 1012 * @sgl_flits: the SGL size in flits 1013 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1014 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1015 * 1016 * Write a work request header and an associated SGL. If the SGL is 1017 * small enough to fit into one Tx descriptor it has already been written 1018 * and we just need to write the WR header. Otherwise we distribute the 1019 * SGL across the number of descriptors it spans. 1020 */ 1021 1022static void 1023write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1024 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1025 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1026{ 1027 1028 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1029 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1030 1031 if (__predict_true(ndesc == 1)) { 1032 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1033 V_WR_SGLSFLT(flits)) | wr_hi; 1034 wmb(); 1035 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1036 V_WR_GEN(txqs->gen)) | wr_lo; 1037 /* XXX gen? */ 1038 wr_gen2(txd, txqs->gen); 1039 } else { 1040 unsigned int ogen = txqs->gen; 1041 const uint64_t *fp = (const uint64_t *)sgl; 1042 struct work_request_hdr *wp = wrp; 1043 1044 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1045 V_WR_SGLSFLT(flits)) | wr_hi; 1046 1047 while (sgl_flits) { 1048 unsigned int avail = WR_FLITS - flits; 1049 1050 if (avail > sgl_flits) 1051 avail = sgl_flits; 1052 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1053 sgl_flits -= avail; 1054 ndesc--; 1055 if (!sgl_flits) 1056 break; 1057 1058 fp += avail; 1059 txd++; 1060 txsd++; 1061 if (++txqs->pidx == txq->size) { 1062 txqs->pidx = 0; 1063 txqs->gen ^= 1; 1064 txd = txq->desc; 1065 txsd = txq->sdesc; 1066 } 1067 1068 /* 1069 * when the head of the mbuf chain 1070 * is freed all clusters will be freed 1071 * with it 1072 */ 1073 txsd->m = NULL; 1074 wrp = (struct work_request_hdr *)txd; 1075 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1076 V_WR_SGLSFLT(1)) | wr_hi; 1077 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1078 sgl_flits + 1)) | 1079 V_WR_GEN(txqs->gen)) | wr_lo; 1080 wr_gen2(txd, txqs->gen); 1081 flits = 1; 1082 } 1083 wrp->wr_hi |= htonl(F_WR_EOP); 1084 wmb(); 1085 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1086 wr_gen2((struct tx_desc *)wp, ogen); 1087 } 1088} 1089 1090 1091/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1092#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1093 1094int 1095t3_encap(struct port_info *p, struct mbuf **m) 1096{ 1097 adapter_t *sc; 1098 struct mbuf *m0; 1099 struct sge_qset *qs; 1100 struct sge_txq *txq; 1101 struct tx_sw_desc *stx; 1102 struct txq_state txqs; 1103 unsigned int nsegs, ndesc, flits, cntrl, mlen; 1104 int err, tso_info = 0; 1105 1106 struct work_request_hdr *wrp; 1107 struct tx_sw_desc *txsd; 1108 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1109 bus_dma_segment_t segs[TX_MAX_SEGS]; 1110 uint32_t wr_hi, wr_lo, sgl_flits; 1111 1112 struct tx_desc *txd; 1113 struct cpl_tx_pkt *cpl; 1114 1115 DPRINTF("t3_encap "); 1116 m0 = *m; 1117 sc = p->adapter; 1118 qs = &sc->sge.qs[p->first_qset]; 1119 txq = &qs->txq[TXQ_ETH]; 1120 stx = &txq->sdesc[txq->pidx]; 1121 txd = &txq->desc[txq->pidx]; 1122 cpl = (struct cpl_tx_pkt *)txd; 1123 mlen = m0->m_pkthdr.len; 1124 cpl->len = htonl(mlen | 0x80000000); 1125 1126 DPRINTF("mlen=%d\n", mlen); 1127 /* 1128 * XXX handle checksum, TSO, and VLAN here 1129 * 1130 */ 1131 cntrl = V_TXPKT_INTF(p->port); 1132 1133 /* 1134 * XXX need to add VLAN support for 6.x 1135 */ 1136#ifdef VLAN_SUPPORTED 1137 if (m0->m_flags & M_VLANTAG) 1138 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 1139 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1140 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1141#endif 1142 if (tso_info) { 1143 int eth_type; 1144 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1145 struct ip *ip; 1146 struct tcphdr *tcp; 1147 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1148 1149 txd->flit[2] = 0; 1150 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1151 hdr->cntrl = htonl(cntrl); 1152 1153 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1154 pkthdr = &tmp[0]; 1155 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1156 } else { 1157 pkthdr = mtod(m0, uint8_t *); 1158 } 1159 1160 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1161 eth_type = CPL_ETH_II_VLAN; 1162 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1163 ETHER_VLAN_ENCAP_LEN); 1164 } else { 1165 eth_type = CPL_ETH_II; 1166 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1167 } 1168 tcp = (struct tcphdr *)((uint8_t *)ip + 1169 sizeof(*ip)); 1170 1171 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1172 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1173 V_LSO_TCPHDR_WORDS(tcp->th_off); 1174 hdr->lso_info = htonl(tso_info); 1175 flits = 3; 1176 } else { 1177 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1178 cpl->cntrl = htonl(cntrl); 1179 1180 if (mlen <= WR_LEN - sizeof(*cpl)) { 1181 txq_prod(txq, 1, &txqs); 1182 txq->sdesc[txqs.pidx].m = m0; 1183 m_set_priority(m0, txqs.pidx); 1184 1185 if (m0->m_len == m0->m_pkthdr.len) 1186 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen); 1187 else 1188 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1189 1190 flits = (mlen + 7) / 8 + 2; 1191 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1192 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1193 F_WR_SOP | F_WR_EOP | txqs.compl); 1194 wmb(); 1195 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1196 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1197 1198 wr_gen2(txd, txqs.gen); 1199 check_ring_tx_db(sc, txq); 1200 return (0); 1201 } 1202 flits = 2; 1203 } 1204 1205 wrp = (struct work_request_hdr *)txd; 1206 1207 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { 1208 return (err); 1209 } 1210 m0 = *m; 1211 ndesc = calc_tx_descs(m0, nsegs); 1212 1213 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1214 make_sgl(sgp, segs, nsegs); 1215 1216 sgl_flits = sgl_len(nsegs); 1217 1218 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1219 txq_prod(txq, ndesc, &txqs); 1220 txsd = &txq->sdesc[txqs.pidx]; 1221 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1222 wr_lo = htonl(V_WR_TID(txq->token)); 1223 txsd->m = m0; 1224 m_set_priority(m0, txqs.pidx); 1225 1226 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1227 check_ring_tx_db(p->adapter, txq); 1228 1229 return (0); 1230} 1231 1232 1233/** 1234 * write_imm - write a packet into a Tx descriptor as immediate data 1235 * @d: the Tx descriptor to write 1236 * @m: the packet 1237 * @len: the length of packet data to write as immediate data 1238 * @gen: the generation bit value to write 1239 * 1240 * Writes a packet as immediate data into a Tx descriptor. The packet 1241 * contains a work request at its beginning. We must write the packet 1242 * carefully so the SGE doesn't read accidentally before it's written in 1243 * its entirety. 1244 */ 1245static __inline void 1246write_imm(struct tx_desc *d, struct mbuf *m, 1247 unsigned int len, unsigned int gen) 1248{ 1249 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1250 struct work_request_hdr *to = (struct work_request_hdr *)d; 1251 1252 memcpy(&to[1], &from[1], len - sizeof(*from)); 1253 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1254 V_WR_BCNTLFLT(len & 7)); 1255 wmb(); 1256 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1257 V_WR_LEN((len + 7) / 8)); 1258 wr_gen2(d, gen); 1259 m_freem(m); 1260} 1261 1262/** 1263 * check_desc_avail - check descriptor availability on a send queue 1264 * @adap: the adapter 1265 * @q: the TX queue 1266 * @m: the packet needing the descriptors 1267 * @ndesc: the number of Tx descriptors needed 1268 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1269 * 1270 * Checks if the requested number of Tx descriptors is available on an 1271 * SGE send queue. If the queue is already suspended or not enough 1272 * descriptors are available the packet is queued for later transmission. 1273 * Must be called with the Tx queue locked. 1274 * 1275 * Returns 0 if enough descriptors are available, 1 if there aren't 1276 * enough descriptors and the packet has been queued, and 2 if the caller 1277 * needs to retry because there weren't enough descriptors at the 1278 * beginning of the call but some freed up in the mean time. 1279 */ 1280static __inline int 1281check_desc_avail(adapter_t *adap, struct sge_txq *q, 1282 struct mbuf *m, unsigned int ndesc, 1283 unsigned int qid) 1284{ 1285 /* 1286 * XXX We currently only use this for checking the control queue 1287 * the control queue is only used for binding qsets which happens 1288 * at init time so we are guaranteed enough descriptors 1289 */ 1290 if (__predict_false(!mbufq_empty(&q->sendq))) { 1291addq_exit: mbufq_tail(&q->sendq, m); 1292 return 1; 1293 } 1294 if (__predict_false(q->size - q->in_use < ndesc)) { 1295 1296 struct sge_qset *qs = txq_to_qset(q, qid); 1297 1298 setbit(&qs->txq_stopped, qid); 1299 smp_mb(); 1300 1301 if (should_restart_tx(q) && 1302 test_and_clear_bit(qid, &qs->txq_stopped)) 1303 return 2; 1304 1305 q->stops++; 1306 goto addq_exit; 1307 } 1308 return 0; 1309} 1310 1311 1312/** 1313 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1314 * @q: the SGE control Tx queue 1315 * 1316 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1317 * that send only immediate data (presently just the control queues) and 1318 * thus do not have any mbufs 1319 */ 1320static __inline void 1321reclaim_completed_tx_imm(struct sge_txq *q) 1322{ 1323 unsigned int reclaim = q->processed - q->cleaned; 1324 1325 mtx_assert(&q->lock, MA_OWNED); 1326 1327 q->in_use -= reclaim; 1328 q->cleaned += reclaim; 1329} 1330 1331static __inline int 1332immediate(const struct mbuf *m) 1333{ 1334 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1335} 1336 1337/** 1338 * ctrl_xmit - send a packet through an SGE control Tx queue 1339 * @adap: the adapter 1340 * @q: the control queue 1341 * @m: the packet 1342 * 1343 * Send a packet through an SGE control Tx queue. Packets sent through 1344 * a control queue must fit entirely as immediate data in a single Tx 1345 * descriptor and have no page fragments. 1346 */ 1347static int 1348ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1349{ 1350 int ret; 1351 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1352 1353 if (__predict_false(!immediate(m))) { 1354 m_freem(m); 1355 return 0; 1356 } 1357 1358 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1359 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1360 1361 mtx_lock(&q->lock); 1362again: reclaim_completed_tx_imm(q); 1363 1364 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1365 if (__predict_false(ret)) { 1366 if (ret == 1) { 1367 mtx_unlock(&q->lock); 1368 return (-1); 1369 } 1370 goto again; 1371 } 1372 1373 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1374 1375 q->in_use++; 1376 if (++q->pidx >= q->size) { 1377 q->pidx = 0; 1378 q->gen ^= 1; 1379 } 1380 mtx_unlock(&q->lock); 1381 wmb(); 1382 t3_write_reg(adap, A_SG_KDOORBELL, 1383 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1384 return (0); 1385} 1386 1387 1388/** 1389 * restart_ctrlq - restart a suspended control queue 1390 * @qs: the queue set cotaining the control queue 1391 * 1392 * Resumes transmission on a suspended Tx control queue. 1393 */ 1394static void 1395restart_ctrlq(void *data, int npending) 1396{ 1397 struct mbuf *m; 1398 struct sge_qset *qs = (struct sge_qset *)data; 1399 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1400 adapter_t *adap = qs->port->adapter; 1401 1402 mtx_lock(&q->lock); 1403again: reclaim_completed_tx_imm(q); 1404 1405 while (q->in_use < q->size && 1406 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1407 1408 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1409 1410 if (++q->pidx >= q->size) { 1411 q->pidx = 0; 1412 q->gen ^= 1; 1413 } 1414 q->in_use++; 1415 } 1416 if (!mbufq_empty(&q->sendq)) { 1417 setbit(&qs->txq_stopped, TXQ_CTRL); 1418 smp_mb(); 1419 1420 if (should_restart_tx(q) && 1421 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1422 goto again; 1423 q->stops++; 1424 } 1425 mtx_unlock(&q->lock); 1426 t3_write_reg(adap, A_SG_KDOORBELL, 1427 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1428} 1429 1430 1431/* 1432 * Send a management message through control queue 0 1433 */ 1434int 1435t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1436{ 1437 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1438} 1439 1440/** 1441 * free_qset - free the resources of an SGE queue set 1442 * @sc: the controller owning the queue set 1443 * @q: the queue set 1444 * 1445 * Release the HW and SW resources associated with an SGE queue set, such 1446 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1447 * queue set must be quiesced prior to calling this. 1448 */ 1449static void 1450t3_free_qset(adapter_t *sc, struct sge_qset *q) 1451{ 1452 int i; 1453 1454 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1455 if (q->fl[i].desc) { 1456 mtx_lock(&sc->sge.reg_lock); 1457 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1458 mtx_unlock(&sc->sge.reg_lock); 1459 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1460 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1461 q->fl[i].desc_map); 1462 bus_dma_tag_destroy(q->fl[i].desc_tag); 1463 bus_dma_tag_destroy(q->fl[i].entry_tag); 1464 } 1465 if (q->fl[i].sdesc) { 1466 free_rx_bufs(sc, &q->fl[i]); 1467 free(q->fl[i].sdesc, M_DEVBUF); 1468 } 1469 } 1470
|
1470 for (i = 0; i < SGE_TXQ_PER_SET; ++i) {
| 1471 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
|
1471 if (q->txq[i].desc) { 1472 mtx_lock(&sc->sge.reg_lock); 1473 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1474 mtx_unlock(&sc->sge.reg_lock); 1475 bus_dmamap_unload(q->txq[i].desc_tag, 1476 q->txq[i].desc_map); 1477 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1478 q->txq[i].desc_map); 1479 bus_dma_tag_destroy(q->txq[i].desc_tag); 1480 bus_dma_tag_destroy(q->txq[i].entry_tag);
| 1472 if (q->txq[i].desc) { 1473 mtx_lock(&sc->sge.reg_lock); 1474 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1475 mtx_unlock(&sc->sge.reg_lock); 1476 bus_dmamap_unload(q->txq[i].desc_tag, 1477 q->txq[i].desc_map); 1478 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1479 q->txq[i].desc_map); 1480 bus_dma_tag_destroy(q->txq[i].desc_tag); 1481 bus_dma_tag_destroy(q->txq[i].entry_tag);
|
| 1482 MTX_DESTROY(&q->txq[i].lock);
|
1481 } 1482 if (q->txq[i].sdesc) { 1483 free(q->txq[i].sdesc, M_DEVBUF); 1484 }
| 1483 } 1484 if (q->txq[i].sdesc) { 1485 free(q->txq[i].sdesc, M_DEVBUF); 1486 }
|
1485 if (mtx_initialized(&q->txq[i].lock)) { 1486 mtx_destroy(&q->txq[i].lock); 1487 }
| |
1488 } 1489 1490 if (q->rspq.desc) { 1491 mtx_lock(&sc->sge.reg_lock); 1492 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1493 mtx_unlock(&sc->sge.reg_lock); 1494 1495 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1496 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1497 q->rspq.desc_map); 1498 bus_dma_tag_destroy(q->rspq.desc_tag);
| 1487 } 1488 1489 if (q->rspq.desc) { 1490 mtx_lock(&sc->sge.reg_lock); 1491 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1492 mtx_unlock(&sc->sge.reg_lock); 1493 1494 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1495 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1496 q->rspq.desc_map); 1497 bus_dma_tag_destroy(q->rspq.desc_tag);
|
| 1498 MTX_DESTROY(&q->rspq.lock);
|
1499 } 1500
| 1499 } 1500
|
1501 if (mtx_initialized(&q->rspq.lock)) 1502 mtx_destroy(&q->rspq.lock); 1503
| |
1504 bzero(q, sizeof(*q)); 1505} 1506 1507/** 1508 * t3_free_sge_resources - free SGE resources 1509 * @sc: the adapter softc 1510 * 1511 * Frees resources used by the SGE queue sets. 1512 */ 1513void 1514t3_free_sge_resources(adapter_t *sc) 1515{
| 1501 bzero(q, sizeof(*q)); 1502} 1503 1504/** 1505 * t3_free_sge_resources - free SGE resources 1506 * @sc: the adapter softc 1507 * 1508 * Frees resources used by the SGE queue sets. 1509 */ 1510void 1511t3_free_sge_resources(adapter_t *sc) 1512{
|
1516 int i;
| 1513 int i, nqsets;
|
1517
| 1514
|
1518 for (i = 0; i < SGE_QSETS; ++i)
| 1515 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1516 nqsets += sc->port[i].nqsets; 1517 1518 for (i = 0; i < nqsets; ++i)
|
1519 t3_free_qset(sc, &sc->sge.qs[i]); 1520} 1521 1522/** 1523 * t3_sge_start - enable SGE 1524 * @sc: the controller softc 1525 * 1526 * Enables the SGE for DMAs. This is the last step in starting packet 1527 * transfers. 1528 */ 1529void 1530t3_sge_start(adapter_t *sc) 1531{ 1532 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1533} 1534 1535/** 1536 * t3_sge_stop - disable SGE operation 1537 * @sc: the adapter 1538 * 1539 * Disables the DMA engine. This can be called in emeregencies (e.g., 1540 * from error interrupts) or from normal process context. In the latter 1541 * case it also disables any pending queue restart tasklets. Note that 1542 * if it is called in interrupt context it cannot disable the restart 1543 * tasklets as it cannot wait, however the tasklets will have no effect 1544 * since the doorbells are disabled and the driver will call this again 1545 * later from process context, at which time the tasklets will be stopped 1546 * if they are still running. 1547 */ 1548void 1549t3_sge_stop(adapter_t *sc) 1550{
| 1519 t3_free_qset(sc, &sc->sge.qs[i]); 1520} 1521 1522/** 1523 * t3_sge_start - enable SGE 1524 * @sc: the controller softc 1525 * 1526 * Enables the SGE for DMAs. This is the last step in starting packet 1527 * transfers. 1528 */ 1529void 1530t3_sge_start(adapter_t *sc) 1531{ 1532 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1533} 1534 1535/** 1536 * t3_sge_stop - disable SGE operation 1537 * @sc: the adapter 1538 * 1539 * Disables the DMA engine. This can be called in emeregencies (e.g., 1540 * from error interrupts) or from normal process context. In the latter 1541 * case it also disables any pending queue restart tasklets. Note that 1542 * if it is called in interrupt context it cannot disable the restart 1543 * tasklets as it cannot wait, however the tasklets will have no effect 1544 * since the doorbells are disabled and the driver will call this again 1545 * later from process context, at which time the tasklets will be stopped 1546 * if they are still running. 1547 */ 1548void 1549t3_sge_stop(adapter_t *sc) 1550{
|
1551 int i;
| 1551 int i, nqsets; 1552
|
1552 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1553 1554 if (sc->tq == NULL) 1555 return; 1556
| 1553 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1554 1555 if (sc->tq == NULL) 1556 return; 1557
|
1557 for (i = 0; i < SGE_QSETS; ++i) {
| 1558 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1559 nqsets += sc->port[i].nqsets; 1560 1561 for (i = 0; i < nqsets; ++i) {
|
1558 struct sge_qset *qs = &sc->sge.qs[i]; 1559 1560 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk); 1561 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk); 1562 } 1563} 1564 1565 1566/** 1567 * free_tx_desc - reclaims Tx descriptors and their buffers 1568 * @adapter: the adapter 1569 * @q: the Tx queue to reclaim descriptors from 1570 * @n: the number of descriptors to reclaim 1571 * 1572 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1573 * Tx buffers. Called with the Tx queue lock held. 1574 */ 1575int 1576free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec) 1577{ 1578 struct tx_sw_desc *d; 1579 unsigned int cidx = q->cidx; 1580 int nbufs = 0; 1581 1582#ifdef T3_TRACE 1583 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1584 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1585#endif 1586 d = &q->sdesc[cidx]; 1587 1588 while (n-- > 0) { 1589 DPRINTF("cidx=%d d=%p\n", cidx, d); 1590 if (d->m) { 1591 if (d->flags & TX_SW_DESC_MAPPED) { 1592 bus_dmamap_unload(q->entry_tag, d->map); 1593 bus_dmamap_destroy(q->entry_tag, d->map); 1594 d->flags &= ~TX_SW_DESC_MAPPED; 1595 } 1596 if (m_get_priority(d->m) == cidx) { 1597 m_vec[nbufs] = d->m; 1598 d->m = NULL; 1599 nbufs++; 1600 } else { 1601 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx); 1602 } 1603 } 1604 ++d; 1605 if (++cidx == q->size) { 1606 cidx = 0; 1607 d = q->sdesc; 1608 } 1609 } 1610 q->cidx = cidx; 1611 1612 return (nbufs); 1613} 1614 1615/** 1616 * is_new_response - check if a response is newly written 1617 * @r: the response descriptor 1618 * @q: the response queue 1619 * 1620 * Returns true if a response descriptor contains a yet unprocessed 1621 * response. 1622 */ 1623static __inline int 1624is_new_response(const struct rsp_desc *r, 1625 const struct sge_rspq *q) 1626{ 1627 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1628} 1629 1630#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1631#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1632 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1633 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1634 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1635 1636/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1637#define NOMEM_INTR_DELAY 2500 1638 1639/** 1640 * write_ofld_wr - write an offload work request 1641 * @adap: the adapter 1642 * @m: the packet to send 1643 * @q: the Tx queue 1644 * @pidx: index of the first Tx descriptor to write 1645 * @gen: the generation value to use 1646 * @ndesc: number of descriptors the packet will occupy 1647 * 1648 * Write an offload work request to send the supplied packet. The packet 1649 * data already carry the work request with most fields populated. 1650 */ 1651static void 1652write_ofld_wr(adapter_t *adap, struct mbuf *m, 1653 struct sge_txq *q, unsigned int pidx, 1654 unsigned int gen, unsigned int ndesc, 1655 bus_dma_segment_t *segs, unsigned int nsegs) 1656{ 1657 unsigned int sgl_flits, flits; 1658 struct work_request_hdr *from; 1659 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1660 struct tx_desc *d = &q->desc[pidx]; 1661 struct txq_state txqs; 1662 1663 if (immediate(m)) { 1664 q->sdesc[pidx].m = NULL; 1665 write_imm(d, m, m->m_len, gen); 1666 return; 1667 } 1668 1669 /* Only TX_DATA builds SGLs */ 1670 1671 from = mtod(m, struct work_request_hdr *); 1672 memcpy(&d->flit[1], &from[1], 1673 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from)); 1674 1675 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; 1676 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1677 1678 make_sgl(sgp, segs, nsegs); 1679 sgl_flits = sgl_len(nsegs); 1680 1681 txqs.gen = q->gen; 1682 txqs.pidx = q->pidx; 1683 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3); 1684 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1685 from->wr_hi, from->wr_lo); 1686} 1687 1688/** 1689 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1690 * @m: the packet 1691 * 1692 * Returns the number of Tx descriptors needed for the given offload 1693 * packet. These packets are already fully constructed. 1694 */ 1695static __inline unsigned int 1696calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1697{ 1698 unsigned int flits, cnt = 0; 1699 1700 1701 if (m->m_len <= WR_LEN) 1702 return 1; /* packet fits as immediate data */ 1703 1704 if (m->m_flags & M_IOVEC) 1705 cnt = mtomv(m)->mv_count; 1706 1707 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */ 1708 1709 return flits_to_desc(flits + sgl_len(cnt)); 1710} 1711 1712/** 1713 * ofld_xmit - send a packet through an offload queue 1714 * @adap: the adapter 1715 * @q: the Tx offload queue 1716 * @m: the packet 1717 * 1718 * Send an offload packet through an SGE offload queue. 1719 */ 1720static int 1721ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1722{ 1723 int ret; 1724 unsigned int pidx, gen, nsegs; 1725 unsigned int ndesc; 1726 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1727 bus_dma_segment_t segs[TX_MAX_SEGS]; 1728 int i, cleaned; 1729 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1730 1731 mtx_lock(&q->lock); 1732 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) { 1733 mtx_unlock(&q->lock); 1734 return (ret); 1735 } 1736 ndesc = calc_tx_descs_ofld(m, nsegs); 1737again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec); 1738 1739 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1740 if (__predict_false(ret)) { 1741 if (ret == 1) { 1742 m_set_priority(m, ndesc); /* save for restart */ 1743 mtx_unlock(&q->lock); 1744 return NET_XMIT_CN; 1745 } 1746 goto again; 1747 } 1748 1749 gen = q->gen; 1750 q->in_use += ndesc; 1751 pidx = q->pidx; 1752 q->pidx += ndesc; 1753 if (q->pidx >= q->size) { 1754 q->pidx -= q->size; 1755 q->gen ^= 1; 1756 } 1757#ifdef T3_TRACE 1758 T3_TRACE5(adap->tb[q->cntxt_id & 7], 1759 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 1760 ndesc, pidx, skb->len, skb->len - skb->data_len, 1761 skb_shinfo(skb)->nr_frags); 1762#endif 1763 mtx_unlock(&q->lock); 1764 1765 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1766 check_ring_tx_db(adap, q); 1767 1768 for (i = 0; i < cleaned; i++) { 1769 m_freem_vec(m_vec[i]); 1770 } 1771 return NET_XMIT_SUCCESS; 1772} 1773 1774/** 1775 * restart_offloadq - restart a suspended offload queue 1776 * @qs: the queue set cotaining the offload queue 1777 * 1778 * Resumes transmission on a suspended Tx offload queue. 1779 */ 1780static void 1781restart_offloadq(void *data, int npending) 1782{ 1783 1784 struct mbuf *m; 1785 struct sge_qset *qs = data; 1786 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 1787 adapter_t *adap = qs->port->adapter; 1788 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1789 bus_dma_segment_t segs[TX_MAX_SEGS]; 1790 int nsegs, i, cleaned; 1791 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1792 1793 mtx_lock(&q->lock); 1794again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec); 1795 1796 while ((m = mbufq_peek(&q->sendq)) != NULL) { 1797 unsigned int gen, pidx; 1798 unsigned int ndesc = m_get_priority(m); 1799 1800 if (__predict_false(q->size - q->in_use < ndesc)) { 1801 setbit(&qs->txq_stopped, TXQ_OFLD); 1802 smp_mb(); 1803 1804 if (should_restart_tx(q) && 1805 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 1806 goto again; 1807 q->stops++; 1808 break; 1809 } 1810 1811 gen = q->gen; 1812 q->in_use += ndesc; 1813 pidx = q->pidx; 1814 q->pidx += ndesc; 1815 if (q->pidx >= q->size) { 1816 q->pidx -= q->size; 1817 q->gen ^= 1; 1818 } 1819 1820 (void)mbufq_dequeue(&q->sendq); 1821 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 1822 mtx_unlock(&q->lock); 1823 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1824 mtx_lock(&q->lock); 1825 } 1826 mtx_unlock(&q->lock); 1827 1828#if USE_GTS 1829 set_bit(TXQ_RUNNING, &q->flags); 1830 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1831#endif 1832 t3_write_reg(adap, A_SG_KDOORBELL, 1833 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1834 1835 for (i = 0; i < cleaned; i++) { 1836 m_freem_vec(m_vec[i]); 1837 } 1838} 1839 1840/** 1841 * queue_set - return the queue set a packet should use 1842 * @m: the packet 1843 * 1844 * Maps a packet to the SGE queue set it should use. The desired queue 1845 * set is carried in bits 1-3 in the packet's priority. 1846 */ 1847static __inline int 1848queue_set(const struct mbuf *m) 1849{ 1850 return m_get_priority(m) >> 1; 1851} 1852 1853/** 1854 * is_ctrl_pkt - return whether an offload packet is a control packet 1855 * @m: the packet 1856 * 1857 * Determines whether an offload packet should use an OFLD or a CTRL 1858 * Tx queue. This is indicated by bit 0 in the packet's priority. 1859 */ 1860static __inline int 1861is_ctrl_pkt(const struct mbuf *m) 1862{ 1863 return m_get_priority(m) & 1; 1864} 1865 1866/** 1867 * t3_offload_tx - send an offload packet 1868 * @tdev: the offload device to send to 1869 * @m: the packet 1870 * 1871 * Sends an offload packet. We use the packet priority to select the 1872 * appropriate Tx queue as follows: bit 0 indicates whether the packet 1873 * should be sent as regular or control, bits 1-3 select the queue set. 1874 */ 1875int 1876t3_offload_tx(struct toedev *tdev, struct mbuf *m) 1877{ 1878 adapter_t *adap = tdev2adap(tdev); 1879 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 1880 1881 if (__predict_false(is_ctrl_pkt(m))) 1882 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 1883 1884 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 1885} 1886 1887/** 1888 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 1889 * @tdev: the offload device that will be receiving the packets 1890 * @q: the SGE response queue that assembled the bundle 1891 * @m: the partial bundle 1892 * @n: the number of packets in the bundle 1893 * 1894 * Delivers a (partial) bundle of Rx offload packets to an offload device. 1895 */ 1896static __inline void 1897deliver_partial_bundle(struct toedev *tdev, 1898 struct sge_rspq *q, 1899 struct mbuf *mbufs[], int n) 1900{ 1901 if (n) { 1902 q->offload_bundles++; 1903 cxgb_ofld_recv(tdev, mbufs, n); 1904 } 1905} 1906 1907static __inline int 1908rx_offload(struct toedev *tdev, struct sge_rspq *rq, 1909 struct mbuf *m, struct mbuf *rx_gather[], 1910 unsigned int gather_idx) 1911{ 1912 rq->offload_pkts++; 1913 m->m_pkthdr.header = mtod(m, void *); 1914 1915 rx_gather[gather_idx++] = m; 1916 if (gather_idx == RX_BUNDLE_SIZE) { 1917 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 1918 gather_idx = 0; 1919 rq->offload_bundles++; 1920 } 1921 return (gather_idx); 1922} 1923 1924static void 1925restart_tx(struct sge_qset *qs) 1926{ 1927 struct adapter *sc = qs->port->adapter; 1928 1929 if (isset(&qs->txq_stopped, TXQ_OFLD) && 1930 should_restart_tx(&qs->txq[TXQ_OFLD]) && 1931 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 1932 qs->txq[TXQ_OFLD].restarts++; 1933 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk); 1934 } 1935 if (isset(&qs->txq_stopped, TXQ_CTRL) && 1936 should_restart_tx(&qs->txq[TXQ_CTRL]) && 1937 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 1938 qs->txq[TXQ_CTRL].restarts++; 1939 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk); 1940 } 1941} 1942 1943/** 1944 * t3_sge_alloc_qset - initialize an SGE queue set 1945 * @sc: the controller softc 1946 * @id: the queue set id 1947 * @nports: how many Ethernet ports will be using this queue set 1948 * @irq_vec_idx: the IRQ vector index for response queue interrupts 1949 * @p: configuration parameters for this queue set 1950 * @ntxq: number of Tx queues for the queue set 1951 * @pi: port info for queue set 1952 * 1953 * Allocate resources and initialize an SGE queue set. A queue set 1954 * comprises a response queue, two Rx free-buffer queues, and up to 3 1955 * Tx queues. The Tx queues are assigned roles in the order Ethernet 1956 * queue, offload queue, and control queue. 1957 */ 1958int 1959t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 1960 const struct qset_params *p, int ntxq, struct port_info *pi) 1961{ 1962 struct sge_qset *q = &sc->sge.qs[id]; 1963 int i, ret = 0; 1964 1965 init_qset_cntxt(q, id); 1966 1967 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 1968 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 1969 &q->fl[0].desc, &q->fl[0].sdesc, 1970 &q->fl[0].desc_tag, &q->fl[0].desc_map, 1971 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 1972 printf("error %d from alloc ring fl0\n", ret); 1973 goto err; 1974 } 1975 1976 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 1977 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 1978 &q->fl[1].desc, &q->fl[1].sdesc, 1979 &q->fl[1].desc_tag, &q->fl[1].desc_map, 1980 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 1981 printf("error %d from alloc ring fl1\n", ret); 1982 goto err; 1983 } 1984 1985 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 1986 &q->rspq.phys_addr, &q->rspq.desc, NULL, 1987 &q->rspq.desc_tag, &q->rspq.desc_map, 1988 NULL, NULL)) != 0) { 1989 printf("error %d from alloc ring rspq\n", ret); 1990 goto err; 1991 } 1992 1993 for (i = 0; i < ntxq; ++i) { 1994 /* 1995 * The control queue always uses immediate data so does not 1996 * need to keep track of any mbufs. 1997 * XXX Placeholder for future TOE support. 1998 */ 1999 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2000 2001 if ((ret = alloc_ring(sc, p->txq_size[i], 2002 sizeof(struct tx_desc), sz, 2003 &q->txq[i].phys_addr, &q->txq[i].desc, 2004 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2005 &q->txq[i].desc_map, 2006 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2007 printf("error %d from alloc ring tx %i\n", ret, i); 2008 goto err; 2009 } 2010 mbufq_init(&q->txq[i].sendq); 2011 q->txq[i].gen = 1; 2012 q->txq[i].size = p->txq_size[i];
| 1562 struct sge_qset *qs = &sc->sge.qs[i]; 1563 1564 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk); 1565 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk); 1566 } 1567} 1568 1569 1570/** 1571 * free_tx_desc - reclaims Tx descriptors and their buffers 1572 * @adapter: the adapter 1573 * @q: the Tx queue to reclaim descriptors from 1574 * @n: the number of descriptors to reclaim 1575 * 1576 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1577 * Tx buffers. Called with the Tx queue lock held. 1578 */ 1579int 1580free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec) 1581{ 1582 struct tx_sw_desc *d; 1583 unsigned int cidx = q->cidx; 1584 int nbufs = 0; 1585 1586#ifdef T3_TRACE 1587 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1588 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1589#endif 1590 d = &q->sdesc[cidx]; 1591 1592 while (n-- > 0) { 1593 DPRINTF("cidx=%d d=%p\n", cidx, d); 1594 if (d->m) { 1595 if (d->flags & TX_SW_DESC_MAPPED) { 1596 bus_dmamap_unload(q->entry_tag, d->map); 1597 bus_dmamap_destroy(q->entry_tag, d->map); 1598 d->flags &= ~TX_SW_DESC_MAPPED; 1599 } 1600 if (m_get_priority(d->m) == cidx) { 1601 m_vec[nbufs] = d->m; 1602 d->m = NULL; 1603 nbufs++; 1604 } else { 1605 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx); 1606 } 1607 } 1608 ++d; 1609 if (++cidx == q->size) { 1610 cidx = 0; 1611 d = q->sdesc; 1612 } 1613 } 1614 q->cidx = cidx; 1615 1616 return (nbufs); 1617} 1618 1619/** 1620 * is_new_response - check if a response is newly written 1621 * @r: the response descriptor 1622 * @q: the response queue 1623 * 1624 * Returns true if a response descriptor contains a yet unprocessed 1625 * response. 1626 */ 1627static __inline int 1628is_new_response(const struct rsp_desc *r, 1629 const struct sge_rspq *q) 1630{ 1631 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1632} 1633 1634#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1635#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1636 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1637 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1638 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1639 1640/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1641#define NOMEM_INTR_DELAY 2500 1642 1643/** 1644 * write_ofld_wr - write an offload work request 1645 * @adap: the adapter 1646 * @m: the packet to send 1647 * @q: the Tx queue 1648 * @pidx: index of the first Tx descriptor to write 1649 * @gen: the generation value to use 1650 * @ndesc: number of descriptors the packet will occupy 1651 * 1652 * Write an offload work request to send the supplied packet. The packet 1653 * data already carry the work request with most fields populated. 1654 */ 1655static void 1656write_ofld_wr(adapter_t *adap, struct mbuf *m, 1657 struct sge_txq *q, unsigned int pidx, 1658 unsigned int gen, unsigned int ndesc, 1659 bus_dma_segment_t *segs, unsigned int nsegs) 1660{ 1661 unsigned int sgl_flits, flits; 1662 struct work_request_hdr *from; 1663 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1664 struct tx_desc *d = &q->desc[pidx]; 1665 struct txq_state txqs; 1666 1667 if (immediate(m)) { 1668 q->sdesc[pidx].m = NULL; 1669 write_imm(d, m, m->m_len, gen); 1670 return; 1671 } 1672 1673 /* Only TX_DATA builds SGLs */ 1674 1675 from = mtod(m, struct work_request_hdr *); 1676 memcpy(&d->flit[1], &from[1], 1677 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from)); 1678 1679 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; 1680 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1681 1682 make_sgl(sgp, segs, nsegs); 1683 sgl_flits = sgl_len(nsegs); 1684 1685 txqs.gen = q->gen; 1686 txqs.pidx = q->pidx; 1687 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3); 1688 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1689 from->wr_hi, from->wr_lo); 1690} 1691 1692/** 1693 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1694 * @m: the packet 1695 * 1696 * Returns the number of Tx descriptors needed for the given offload 1697 * packet. These packets are already fully constructed. 1698 */ 1699static __inline unsigned int 1700calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1701{ 1702 unsigned int flits, cnt = 0; 1703 1704 1705 if (m->m_len <= WR_LEN) 1706 return 1; /* packet fits as immediate data */ 1707 1708 if (m->m_flags & M_IOVEC) 1709 cnt = mtomv(m)->mv_count; 1710 1711 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */ 1712 1713 return flits_to_desc(flits + sgl_len(cnt)); 1714} 1715 1716/** 1717 * ofld_xmit - send a packet through an offload queue 1718 * @adap: the adapter 1719 * @q: the Tx offload queue 1720 * @m: the packet 1721 * 1722 * Send an offload packet through an SGE offload queue. 1723 */ 1724static int 1725ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1726{ 1727 int ret; 1728 unsigned int pidx, gen, nsegs; 1729 unsigned int ndesc; 1730 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1731 bus_dma_segment_t segs[TX_MAX_SEGS]; 1732 int i, cleaned; 1733 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1734 1735 mtx_lock(&q->lock); 1736 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) { 1737 mtx_unlock(&q->lock); 1738 return (ret); 1739 } 1740 ndesc = calc_tx_descs_ofld(m, nsegs); 1741again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec); 1742 1743 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1744 if (__predict_false(ret)) { 1745 if (ret == 1) { 1746 m_set_priority(m, ndesc); /* save for restart */ 1747 mtx_unlock(&q->lock); 1748 return NET_XMIT_CN; 1749 } 1750 goto again; 1751 } 1752 1753 gen = q->gen; 1754 q->in_use += ndesc; 1755 pidx = q->pidx; 1756 q->pidx += ndesc; 1757 if (q->pidx >= q->size) { 1758 q->pidx -= q->size; 1759 q->gen ^= 1; 1760 } 1761#ifdef T3_TRACE 1762 T3_TRACE5(adap->tb[q->cntxt_id & 7], 1763 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 1764 ndesc, pidx, skb->len, skb->len - skb->data_len, 1765 skb_shinfo(skb)->nr_frags); 1766#endif 1767 mtx_unlock(&q->lock); 1768 1769 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1770 check_ring_tx_db(adap, q); 1771 1772 for (i = 0; i < cleaned; i++) { 1773 m_freem_vec(m_vec[i]); 1774 } 1775 return NET_XMIT_SUCCESS; 1776} 1777 1778/** 1779 * restart_offloadq - restart a suspended offload queue 1780 * @qs: the queue set cotaining the offload queue 1781 * 1782 * Resumes transmission on a suspended Tx offload queue. 1783 */ 1784static void 1785restart_offloadq(void *data, int npending) 1786{ 1787 1788 struct mbuf *m; 1789 struct sge_qset *qs = data; 1790 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 1791 adapter_t *adap = qs->port->adapter; 1792 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1793 bus_dma_segment_t segs[TX_MAX_SEGS]; 1794 int nsegs, i, cleaned; 1795 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1796 1797 mtx_lock(&q->lock); 1798again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec); 1799 1800 while ((m = mbufq_peek(&q->sendq)) != NULL) { 1801 unsigned int gen, pidx; 1802 unsigned int ndesc = m_get_priority(m); 1803 1804 if (__predict_false(q->size - q->in_use < ndesc)) { 1805 setbit(&qs->txq_stopped, TXQ_OFLD); 1806 smp_mb(); 1807 1808 if (should_restart_tx(q) && 1809 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 1810 goto again; 1811 q->stops++; 1812 break; 1813 } 1814 1815 gen = q->gen; 1816 q->in_use += ndesc; 1817 pidx = q->pidx; 1818 q->pidx += ndesc; 1819 if (q->pidx >= q->size) { 1820 q->pidx -= q->size; 1821 q->gen ^= 1; 1822 } 1823 1824 (void)mbufq_dequeue(&q->sendq); 1825 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 1826 mtx_unlock(&q->lock); 1827 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1828 mtx_lock(&q->lock); 1829 } 1830 mtx_unlock(&q->lock); 1831 1832#if USE_GTS 1833 set_bit(TXQ_RUNNING, &q->flags); 1834 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1835#endif 1836 t3_write_reg(adap, A_SG_KDOORBELL, 1837 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1838 1839 for (i = 0; i < cleaned; i++) { 1840 m_freem_vec(m_vec[i]); 1841 } 1842} 1843 1844/** 1845 * queue_set - return the queue set a packet should use 1846 * @m: the packet 1847 * 1848 * Maps a packet to the SGE queue set it should use. The desired queue 1849 * set is carried in bits 1-3 in the packet's priority. 1850 */ 1851static __inline int 1852queue_set(const struct mbuf *m) 1853{ 1854 return m_get_priority(m) >> 1; 1855} 1856 1857/** 1858 * is_ctrl_pkt - return whether an offload packet is a control packet 1859 * @m: the packet 1860 * 1861 * Determines whether an offload packet should use an OFLD or a CTRL 1862 * Tx queue. This is indicated by bit 0 in the packet's priority. 1863 */ 1864static __inline int 1865is_ctrl_pkt(const struct mbuf *m) 1866{ 1867 return m_get_priority(m) & 1; 1868} 1869 1870/** 1871 * t3_offload_tx - send an offload packet 1872 * @tdev: the offload device to send to 1873 * @m: the packet 1874 * 1875 * Sends an offload packet. We use the packet priority to select the 1876 * appropriate Tx queue as follows: bit 0 indicates whether the packet 1877 * should be sent as regular or control, bits 1-3 select the queue set. 1878 */ 1879int 1880t3_offload_tx(struct toedev *tdev, struct mbuf *m) 1881{ 1882 adapter_t *adap = tdev2adap(tdev); 1883 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 1884 1885 if (__predict_false(is_ctrl_pkt(m))) 1886 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 1887 1888 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 1889} 1890 1891/** 1892 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 1893 * @tdev: the offload device that will be receiving the packets 1894 * @q: the SGE response queue that assembled the bundle 1895 * @m: the partial bundle 1896 * @n: the number of packets in the bundle 1897 * 1898 * Delivers a (partial) bundle of Rx offload packets to an offload device. 1899 */ 1900static __inline void 1901deliver_partial_bundle(struct toedev *tdev, 1902 struct sge_rspq *q, 1903 struct mbuf *mbufs[], int n) 1904{ 1905 if (n) { 1906 q->offload_bundles++; 1907 cxgb_ofld_recv(tdev, mbufs, n); 1908 } 1909} 1910 1911static __inline int 1912rx_offload(struct toedev *tdev, struct sge_rspq *rq, 1913 struct mbuf *m, struct mbuf *rx_gather[], 1914 unsigned int gather_idx) 1915{ 1916 rq->offload_pkts++; 1917 m->m_pkthdr.header = mtod(m, void *); 1918 1919 rx_gather[gather_idx++] = m; 1920 if (gather_idx == RX_BUNDLE_SIZE) { 1921 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 1922 gather_idx = 0; 1923 rq->offload_bundles++; 1924 } 1925 return (gather_idx); 1926} 1927 1928static void 1929restart_tx(struct sge_qset *qs) 1930{ 1931 struct adapter *sc = qs->port->adapter; 1932 1933 if (isset(&qs->txq_stopped, TXQ_OFLD) && 1934 should_restart_tx(&qs->txq[TXQ_OFLD]) && 1935 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 1936 qs->txq[TXQ_OFLD].restarts++; 1937 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk); 1938 } 1939 if (isset(&qs->txq_stopped, TXQ_CTRL) && 1940 should_restart_tx(&qs->txq[TXQ_CTRL]) && 1941 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 1942 qs->txq[TXQ_CTRL].restarts++; 1943 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk); 1944 } 1945} 1946 1947/** 1948 * t3_sge_alloc_qset - initialize an SGE queue set 1949 * @sc: the controller softc 1950 * @id: the queue set id 1951 * @nports: how many Ethernet ports will be using this queue set 1952 * @irq_vec_idx: the IRQ vector index for response queue interrupts 1953 * @p: configuration parameters for this queue set 1954 * @ntxq: number of Tx queues for the queue set 1955 * @pi: port info for queue set 1956 * 1957 * Allocate resources and initialize an SGE queue set. A queue set 1958 * comprises a response queue, two Rx free-buffer queues, and up to 3 1959 * Tx queues. The Tx queues are assigned roles in the order Ethernet 1960 * queue, offload queue, and control queue. 1961 */ 1962int 1963t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 1964 const struct qset_params *p, int ntxq, struct port_info *pi) 1965{ 1966 struct sge_qset *q = &sc->sge.qs[id]; 1967 int i, ret = 0; 1968 1969 init_qset_cntxt(q, id); 1970 1971 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 1972 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 1973 &q->fl[0].desc, &q->fl[0].sdesc, 1974 &q->fl[0].desc_tag, &q->fl[0].desc_map, 1975 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 1976 printf("error %d from alloc ring fl0\n", ret); 1977 goto err; 1978 } 1979 1980 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 1981 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 1982 &q->fl[1].desc, &q->fl[1].sdesc, 1983 &q->fl[1].desc_tag, &q->fl[1].desc_map, 1984 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 1985 printf("error %d from alloc ring fl1\n", ret); 1986 goto err; 1987 } 1988 1989 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 1990 &q->rspq.phys_addr, &q->rspq.desc, NULL, 1991 &q->rspq.desc_tag, &q->rspq.desc_map, 1992 NULL, NULL)) != 0) { 1993 printf("error %d from alloc ring rspq\n", ret); 1994 goto err; 1995 } 1996 1997 for (i = 0; i < ntxq; ++i) { 1998 /* 1999 * The control queue always uses immediate data so does not 2000 * need to keep track of any mbufs. 2001 * XXX Placeholder for future TOE support. 2002 */ 2003 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2004 2005 if ((ret = alloc_ring(sc, p->txq_size[i], 2006 sizeof(struct tx_desc), sz, 2007 &q->txq[i].phys_addr, &q->txq[i].desc, 2008 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2009 &q->txq[i].desc_map, 2010 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2011 printf("error %d from alloc ring tx %i\n", ret, i); 2012 goto err; 2013 } 2014 mbufq_init(&q->txq[i].sendq); 2015 q->txq[i].gen = 1; 2016 q->txq[i].size = p->txq_size[i];
|
2013 mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF);
| 2017 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2018 device_get_unit(sc->dev), irq_vec_idx, i); 2019 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
|
2014 } 2015 2016 TASK_INIT(&q->txq[TXQ_OFLD].qresume_tsk, 0, restart_offloadq, q); 2017 TASK_INIT(&q->txq[TXQ_CTRL].qresume_tsk, 0, restart_ctrlq, q); 2018 2019 q->fl[0].gen = q->fl[1].gen = 1; 2020 q->fl[0].size = p->fl_size; 2021 q->fl[1].size = p->jumbo_size; 2022 2023 q->rspq.gen = 1; 2024 q->rspq.size = p->rspq_size;
| 2020 } 2021 2022 TASK_INIT(&q->txq[TXQ_OFLD].qresume_tsk, 0, restart_offloadq, q); 2023 TASK_INIT(&q->txq[TXQ_CTRL].qresume_tsk, 0, restart_ctrlq, q); 2024 2025 q->fl[0].gen = q->fl[1].gen = 1; 2026 q->fl[0].size = p->fl_size; 2027 q->fl[1].size = p->jumbo_size; 2028 2029 q->rspq.gen = 1; 2030 q->rspq.size = p->rspq_size;
|
2025 mtx_init(&q->rspq.lock, "t3 rspq lock", NULL, MTX_DEF); 2026
| 2031
|
2027 q->txq[TXQ_ETH].stop_thres = nports * 2028 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2029 2030 q->fl[0].buf_size = MCLBYTES; 2031 q->fl[0].zone = zone_clust; 2032 q->fl[0].type = EXT_CLUSTER; 2033 q->fl[1].buf_size = MJUMPAGESIZE; 2034 q->fl[1].zone = zone_jumbop; 2035 q->fl[1].type = EXT_JUMBOP; 2036 2037 q->lro.enabled = lro_default; 2038 2039 mtx_lock(&sc->sge.reg_lock); 2040 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2041 q->rspq.phys_addr, q->rspq.size, 2042 q->fl[0].buf_size, 1, 0); 2043 if (ret) { 2044 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2045 goto err_unlock; 2046 } 2047 2048 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2049 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2050 q->fl[i].phys_addr, q->fl[i].size, 2051 q->fl[i].buf_size, p->cong_thres, 1, 2052 0); 2053 if (ret) { 2054 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2055 goto err_unlock; 2056 } 2057 } 2058 2059 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2060 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2061 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2062 1, 0); 2063 if (ret) { 2064 printf("error %d from t3_sge_init_ecntxt\n", ret); 2065 goto err_unlock; 2066 } 2067 2068 if (ntxq > 1) { 2069 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2070 USE_GTS, SGE_CNTXT_OFLD, id, 2071 q->txq[TXQ_OFLD].phys_addr, 2072 q->txq[TXQ_OFLD].size, 0, 1, 0); 2073 if (ret) { 2074 printf("error %d from t3_sge_init_ecntxt\n", ret); 2075 goto err_unlock; 2076 } 2077 } 2078 2079 if (ntxq > 2) { 2080 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2081 SGE_CNTXT_CTRL, id, 2082 q->txq[TXQ_CTRL].phys_addr, 2083 q->txq[TXQ_CTRL].size, 2084 q->txq[TXQ_CTRL].token, 1, 0); 2085 if (ret) { 2086 printf("error %d from t3_sge_init_ecntxt\n", ret); 2087 goto err_unlock; 2088 } 2089 } 2090
| 2032 q->txq[TXQ_ETH].stop_thres = nports * 2033 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2034 2035 q->fl[0].buf_size = MCLBYTES; 2036 q->fl[0].zone = zone_clust; 2037 q->fl[0].type = EXT_CLUSTER; 2038 q->fl[1].buf_size = MJUMPAGESIZE; 2039 q->fl[1].zone = zone_jumbop; 2040 q->fl[1].type = EXT_JUMBOP; 2041 2042 q->lro.enabled = lro_default; 2043 2044 mtx_lock(&sc->sge.reg_lock); 2045 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2046 q->rspq.phys_addr, q->rspq.size, 2047 q->fl[0].buf_size, 1, 0); 2048 if (ret) { 2049 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2050 goto err_unlock; 2051 } 2052 2053 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2054 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2055 q->fl[i].phys_addr, q->fl[i].size, 2056 q->fl[i].buf_size, p->cong_thres, 1, 2057 0); 2058 if (ret) { 2059 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2060 goto err_unlock; 2061 } 2062 } 2063 2064 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2065 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2066 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2067 1, 0); 2068 if (ret) { 2069 printf("error %d from t3_sge_init_ecntxt\n", ret); 2070 goto err_unlock; 2071 } 2072 2073 if (ntxq > 1) { 2074 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2075 USE_GTS, SGE_CNTXT_OFLD, id, 2076 q->txq[TXQ_OFLD].phys_addr, 2077 q->txq[TXQ_OFLD].size, 0, 1, 0); 2078 if (ret) { 2079 printf("error %d from t3_sge_init_ecntxt\n", ret); 2080 goto err_unlock; 2081 } 2082 } 2083 2084 if (ntxq > 2) { 2085 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2086 SGE_CNTXT_CTRL, id, 2087 q->txq[TXQ_CTRL].phys_addr, 2088 q->txq[TXQ_CTRL].size, 2089 q->txq[TXQ_CTRL].token, 1, 0); 2090 if (ret) { 2091 printf("error %d from t3_sge_init_ecntxt\n", ret); 2092 goto err_unlock; 2093 } 2094 } 2095
|
| 2096 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2097 device_get_unit(sc->dev), irq_vec_idx); 2098 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2099
|
2091 mtx_unlock(&sc->sge.reg_lock); 2092 t3_update_qset_coalesce(q, p); 2093 q->port = pi; 2094 2095 refill_fl(sc, &q->fl[0], q->fl[0].size); 2096 refill_fl(sc, &q->fl[1], q->fl[1].size); 2097 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2098 2099 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2100 V_NEWTIMER(q->rspq.holdoff_tmr)); 2101 2102 return (0); 2103 2104err_unlock: 2105 mtx_unlock(&sc->sge.reg_lock); 2106err: 2107 t3_free_qset(sc, q); 2108 2109 return (ret); 2110} 2111 2112void 2113t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2114{ 2115 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2116 struct ifnet *ifp = pi->ifp; 2117 2118 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2119 if (&pi->adapter->port[cpl->iff] != pi) 2120 panic("bad port index %d m->m_data=%p\n", cpl->iff, mtod(m, uint8_t *)); 2121 2122 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2123 cpl->csum_valid && cpl->csum == 0xffff) { 2124 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2125 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2126 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2127 m->m_pkthdr.csum_data = 0xffff; 2128 } 2129 /* 2130 * XXX need to add VLAN support for 6.x 2131 */ 2132#ifdef VLAN_SUPPORTED 2133 if (__predict_false(cpl->vlan_valid)) { 2134 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2135 m->m_flags |= M_VLANTAG; 2136 } 2137#endif 2138 2139 m->m_pkthdr.rcvif = ifp; 2140 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2141 m_explode(m); 2142 /* 2143 * adjust after conversion to mbuf chain 2144 */ 2145 m_adj(m, sizeof(*cpl) + ethpad); 2146 2147 (*ifp->if_input)(ifp, m); 2148} 2149 2150/** 2151 * get_packet - return the next ingress packet buffer from a free list 2152 * @adap: the adapter that received the packet 2153 * @drop_thres: # of remaining buffers before we start dropping packets 2154 * @qs: the qset that the SGE free list holding the packet belongs to 2155 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2156 * @r: response descriptor 2157 * 2158 * Get the next packet from a free list and complete setup of the 2159 * sk_buff. If the packet is small we make a copy and recycle the 2160 * original buffer, otherwise we use the original buffer itself. If a 2161 * positive drop threshold is supplied packets are dropped and their 2162 * buffers recycled if (a) the number of remaining buffers is under the 2163 * threshold and the packet is too big to copy, or (b) the packet should 2164 * be copied but there is no memory for the copy. 2165 */ 2166static int 2167get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2168 struct mbuf *m, struct rsp_desc *r) 2169{ 2170 2171 unsigned int len_cq = ntohl(r->len_cq); 2172 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2173 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2174 uint32_t len = G_RSPD_LEN(len_cq); 2175 uint32_t flags = ntohl(r->flags); 2176 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2177 void *cl; 2178 int ret = 0; 2179 2180 prefetch(sd->cl); 2181 2182 fl->credits--; 2183 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2184 2185 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2186 cl = mtod(m, void *); 2187 memcpy(cl, sd->cl, len); 2188 recycle_rx_buf(adap, fl, fl->cidx); 2189 } else { 2190 cl = sd->cl; 2191 bus_dmamap_unload(fl->entry_tag, sd->map); 2192 } 2193 switch(sopeop) { 2194 case RSPQ_SOP_EOP: 2195 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2196 if (cl == sd->cl) 2197 m_cljset(m, cl, fl->type); 2198 m->m_len = m->m_pkthdr.len = len; 2199 ret = 1; 2200 goto done; 2201 break; 2202 case RSPQ_NSOP_NEOP: 2203 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2204 ret = 0; 2205 break; 2206 case RSPQ_SOP: 2207 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2208 m_iovinit(m); 2209 ret = 0; 2210 break; 2211 case RSPQ_EOP: 2212 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2213 ret = 1; 2214 break; 2215 } 2216 m_iovappend(m, cl, fl->buf_size, len, 0); 2217 2218done: 2219 if (++fl->cidx == fl->size) 2220 fl->cidx = 0; 2221 2222 return (ret); 2223} 2224 2225/** 2226 * handle_rsp_cntrl_info - handles control information in a response 2227 * @qs: the queue set corresponding to the response 2228 * @flags: the response control flags 2229 * 2230 * Handles the control information of an SGE response, such as GTS 2231 * indications and completion credits for the queue set's Tx queues. 2232 * HW coalesces credits, we don't do any extra SW coalescing. 2233 */ 2234static __inline void 2235handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2236{ 2237 unsigned int credits; 2238 2239#if USE_GTS 2240 if (flags & F_RSPD_TXQ0_GTS) 2241 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2242#endif 2243 credits = G_RSPD_TXQ0_CR(flags); 2244 if (credits) { 2245 qs->txq[TXQ_ETH].processed += credits; 2246 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 2247 taskqueue_enqueue(qs->port->adapter->tq, 2248 &qs->port->timer_reclaim_task); 2249 } 2250 2251 credits = G_RSPD_TXQ2_CR(flags); 2252 if (credits) 2253 qs->txq[TXQ_CTRL].processed += credits; 2254 2255# if USE_GTS 2256 if (flags & F_RSPD_TXQ1_GTS) 2257 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2258# endif 2259 credits = G_RSPD_TXQ1_CR(flags); 2260 if (credits) 2261 qs->txq[TXQ_OFLD].processed += credits; 2262} 2263 2264static void 2265check_ring_db(adapter_t *adap, struct sge_qset *qs, 2266 unsigned int sleeping) 2267{ 2268 ; 2269} 2270 2271/** 2272 * process_responses - process responses from an SGE response queue 2273 * @adap: the adapter 2274 * @qs: the queue set to which the response queue belongs 2275 * @budget: how many responses can be processed in this round 2276 * 2277 * Process responses from an SGE response queue up to the supplied budget. 2278 * Responses include received packets as well as credits and other events 2279 * for the queues that belong to the response queue's queue set. 2280 * A negative budget is effectively unlimited. 2281 * 2282 * Additionally choose the interrupt holdoff time for the next interrupt 2283 * on this queue. If the system is under memory shortage use a fairly 2284 * long delay to help recovery. 2285 */ 2286static int 2287process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2288{ 2289 struct sge_rspq *rspq = &qs->rspq; 2290 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2291 int budget_left = budget; 2292 unsigned int sleeping = 0; 2293 int lro = qs->lro.enabled; 2294 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2295 int ngathered = 0; 2296#ifdef DEBUG 2297 static int last_holdoff = 0; 2298 if (rspq->holdoff_tmr != last_holdoff) { 2299 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2300 last_holdoff = rspq->holdoff_tmr; 2301 } 2302#endif 2303 rspq->next_holdoff = rspq->holdoff_tmr; 2304 2305 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2306 int eth, eop = 0, ethpad = 0; 2307 uint32_t flags = ntohl(r->flags); 2308 uint32_t rss_csum = *(const uint32_t *)r; 2309 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 2310 2311 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2312 2313 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2314 /* XXX */ 2315 printf("async notification\n"); 2316 2317 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2318 struct mbuf *m = NULL; 2319 if (cxgb_debug) 2320 printf("IMM DATA VALID\n"); 2321 if (rspq->m == NULL) 2322 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 2323 else 2324 m = m_gethdr(M_NOWAIT, MT_DATA); 2325 2326 if (rspq->m == NULL || m == NULL) { 2327 rspq->next_holdoff = NOMEM_INTR_DELAY; 2328 budget_left--; 2329 break; 2330 } 2331 get_imm_packet(adap, r, rspq->m, m); 2332 eop = 1; 2333 rspq->imm_data++; 2334 } else if (r->len_cq) { 2335 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2336 2337 if (rspq->m == NULL) 2338 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 2339 if (rspq->m == NULL) { 2340 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2341 break; 2342 } 2343 2344 ethpad = 2; 2345 eop = get_packet(adap, drop_thresh, qs, rspq->m, r); 2346 } else { 2347 DPRINTF("pure response\n"); 2348 rspq->pure_rsps++; 2349 } 2350 2351 if (flags & RSPD_CTRL_MASK) { 2352 sleeping |= flags & RSPD_GTS_MASK; 2353 handle_rsp_cntrl_info(qs, flags); 2354 } 2355 2356 r++; 2357 if (__predict_false(++rspq->cidx == rspq->size)) { 2358 rspq->cidx = 0; 2359 rspq->gen ^= 1; 2360 r = rspq->desc; 2361 } 2362 2363 prefetch(r); 2364 if (++rspq->credits >= (rspq->size / 4)) { 2365 refill_rspq(adap, rspq, rspq->credits); 2366 rspq->credits = 0; 2367 } 2368 2369 if (eop) { 2370 prefetch(mtod(rspq->m, uint8_t *)); 2371 prefetch(mtod(rspq->m, uint8_t *) + L1_CACHE_BYTES); 2372 2373 if (eth) { 2374 t3_rx_eth_lro(adap, rspq, rspq->m, ethpad, 2375 rss_hash, rss_csum, lro); 2376 2377 rspq->m = NULL; 2378 } else { 2379 rspq->m->m_pkthdr.csum_data = rss_csum; 2380 /* 2381 * XXX size mismatch 2382 */ 2383 m_set_priority(rspq->m, rss_hash); 2384 2385 ngathered = rx_offload(&adap->tdev, rspq, rspq->m, 2386 offload_mbufs, ngathered); 2387 } 2388#ifdef notyet 2389 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task); 2390#else 2391 __refill_fl(adap, &qs->fl[0]); 2392 __refill_fl(adap, &qs->fl[1]); 2393#endif 2394 } 2395 --budget_left; 2396 } 2397 2398 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2399 t3_lro_flush(adap, qs, &qs->lro); 2400 2401 if (sleeping) 2402 check_ring_db(adap, qs, sleeping); 2403 2404 smp_mb(); /* commit Tx queue processed updates */ 2405 if (__predict_false(qs->txq_stopped != 0)) 2406 restart_tx(qs); 2407 2408 budget -= budget_left; 2409 return (budget); 2410} 2411 2412/* 2413 * A helper function that processes responses and issues GTS. 2414 */ 2415static __inline int 2416process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2417{ 2418 int work; 2419 static int last_holdoff = 0; 2420 2421 work = process_responses(adap, rspq_to_qset(rq), -1); 2422 2423 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2424 printf("next_holdoff=%d\n", rq->next_holdoff); 2425 last_holdoff = rq->next_holdoff; 2426 } 2427 2428 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2429 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2430 return work; 2431} 2432 2433 2434/* 2435 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2436 * Handles data events from SGE response queues as well as error and other 2437 * async events as they all use the same interrupt pin. We use one SGE 2438 * response queue per port in this mode and protect all response queues with 2439 * queue 0's lock. 2440 */ 2441void 2442t3b_intr(void *data) 2443{ 2444 uint32_t map; 2445 adapter_t *adap = data; 2446 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2447 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2448 2449 t3_write_reg(adap, A_PL_CLI, 0); 2450 map = t3_read_reg(adap, A_SG_DATA_INTR); 2451 2452 if (!map) 2453 return; 2454 2455 if (__predict_false(map & F_ERRINTR)) 2456 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2457 2458 mtx_lock(&q0->lock); 2459 2460 if (__predict_true(map & 1)) 2461 process_responses_gts(adap, q0); 2462 2463 if (map & 2) 2464 process_responses_gts(adap, q1); 2465 2466 mtx_unlock(&q0->lock); 2467} 2468 2469/* 2470 * The MSI interrupt handler. This needs to handle data events from SGE 2471 * response queues as well as error and other async events as they all use 2472 * the same MSI vector. We use one SGE response queue per port in this mode 2473 * and protect all response queues with queue 0's lock. 2474 */ 2475void 2476t3_intr_msi(void *data) 2477{ 2478 adapter_t *adap = data; 2479 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2480 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2481 int new_packets = 0; 2482 2483 mtx_lock(&q0->lock); 2484 if (process_responses_gts(adap, q0)) { 2485 new_packets = 1; 2486 } 2487 2488 if (adap->params.nports == 2 && 2489 process_responses_gts(adap, q1)) { 2490 new_packets = 1; 2491 } 2492 2493 mtx_unlock(&q0->lock); 2494 if (new_packets == 0) 2495 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2496} 2497 2498void 2499t3_intr_msix(void *data) 2500{ 2501 struct sge_qset *qs = data; 2502 adapter_t *adap = qs->port->adapter; 2503 struct sge_rspq *rspq = &qs->rspq; 2504 2505 mtx_lock(&rspq->lock); 2506 if (process_responses_gts(adap, rspq) == 0) 2507 rspq->unhandled_irqs++; 2508 mtx_unlock(&rspq->lock); 2509} 2510 2511/* 2512 * broken by recent mbuf changes 2513 */ 2514static int 2515t3_lro_enable(SYSCTL_HANDLER_ARGS) 2516{ 2517 adapter_t *sc; 2518 int i, j, enabled, err, nqsets = 0; 2519 2520#ifndef LRO_WORKING 2521 return (0); 2522#endif 2523 2524 sc = arg1; 2525 enabled = sc->sge.qs[0].lro.enabled; 2526 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2527 2528 if (err != 0) 2529 return (err); 2530 if (enabled == sc->sge.qs[0].lro.enabled) 2531 return (0); 2532 2533 for (i = 0; i < sc->params.nports; i++) 2534 for (j = 0; j < sc->port[i].nqsets; j++) 2535 nqsets++; 2536 2537 for (i = 0; i < nqsets; i++) 2538 sc->sge.qs[i].lro.enabled = enabled; 2539 2540 return (0); 2541} 2542 2543static int 2544t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2545{ 2546 adapter_t *sc = arg1; 2547 struct qset_params *qsp = &sc->params.sge.qset[0]; 2548 int coalesce_nsecs; 2549 struct sge_qset *qs; 2550 int i, j, err, nqsets = 0; 2551 struct mtx *lock; 2552 2553 coalesce_nsecs = qsp->coalesce_nsecs; 2554 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2555 2556 if (err != 0) { 2557 return (err); 2558 } 2559 if (coalesce_nsecs == qsp->coalesce_nsecs) 2560 return (0); 2561 2562 for (i = 0; i < sc->params.nports; i++) 2563 for (j = 0; j < sc->port[i].nqsets; j++) 2564 nqsets++; 2565 2566 coalesce_nsecs = max(100, coalesce_nsecs); 2567 2568 for (i = 0; i < nqsets; i++) { 2569 qs = &sc->sge.qs[i]; 2570 qsp = &sc->params.sge.qset[i]; 2571 qsp->coalesce_nsecs = coalesce_nsecs; 2572 2573 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2574 &sc->sge.qs[0].rspq.lock; 2575 2576 mtx_lock(lock); 2577 t3_update_qset_coalesce(qs, qsp); 2578 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2579 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2580 mtx_unlock(lock); 2581 } 2582 2583 return (0); 2584} 2585 2586 2587void 2588t3_add_sysctls(adapter_t *sc) 2589{ 2590 struct sysctl_ctx_list *ctx; 2591 struct sysctl_oid_list *children; 2592 2593 ctx = device_get_sysctl_ctx(sc->dev); 2594 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2595 2596 /* random information */ 2597 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2598 "firmware_version", 2599 CTLFLAG_RD, &sc->fw_version, 2600 0, "firmware version"); 2601 2602 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2603 "enable_lro", 2604 CTLTYPE_INT|CTLFLAG_RW, sc, 2605 0, t3_lro_enable, 2606 "I", "enable large receive offload"); 2607 2608 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2609 "intr_coal", 2610 CTLTYPE_INT|CTLFLAG_RW, sc, 2611 0, t3_set_coalesce_nsecs, 2612 "I", "interrupt coalescing timer (ns)"); 2613 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2614 "enable_debug", 2615 CTLFLAG_RW, &cxgb_debug, 2616 0, "enable verbose debugging output"); 2617 2618 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2619 "collapse_free", 2620 CTLFLAG_RD, &collapse_free, 2621 0, "frees during collapse"); 2622 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2623 "mb_free_vec_free", 2624 CTLFLAG_RD, &mb_free_vec_free, 2625 0, "frees during mb_free_vec"); 2626 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2627 "collapse_mbufs", 2628 CTLFLAG_RW, &collapse_mbufs, 2629 0, "collapse mbuf chains into iovecs"); 2630} 2631 2632/** 2633 * t3_get_desc - dump an SGE descriptor for debugging purposes 2634 * @qs: the queue set 2635 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2636 * @idx: the descriptor index in the queue 2637 * @data: where to dump the descriptor contents 2638 * 2639 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2640 * size of the descriptor. 2641 */ 2642int 2643t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2644 unsigned char *data) 2645{ 2646 if (qnum >= 6) 2647 return (EINVAL); 2648 2649 if (qnum < 3) { 2650 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2651 return -EINVAL; 2652 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2653 return sizeof(struct tx_desc); 2654 } 2655 2656 if (qnum == 3) { 2657 if (!qs->rspq.desc || idx >= qs->rspq.size) 2658 return (EINVAL); 2659 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2660 return sizeof(struct rsp_desc); 2661 } 2662 2663 qnum -= 4; 2664 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2665 return (EINVAL); 2666 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2667 return sizeof(struct rx_desc); 2668}
| 2100 mtx_unlock(&sc->sge.reg_lock); 2101 t3_update_qset_coalesce(q, p); 2102 q->port = pi; 2103 2104 refill_fl(sc, &q->fl[0], q->fl[0].size); 2105 refill_fl(sc, &q->fl[1], q->fl[1].size); 2106 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2107 2108 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2109 V_NEWTIMER(q->rspq.holdoff_tmr)); 2110 2111 return (0); 2112 2113err_unlock: 2114 mtx_unlock(&sc->sge.reg_lock); 2115err: 2116 t3_free_qset(sc, q); 2117 2118 return (ret); 2119} 2120 2121void 2122t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2123{ 2124 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2125 struct ifnet *ifp = pi->ifp; 2126 2127 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2128 if (&pi->adapter->port[cpl->iff] != pi) 2129 panic("bad port index %d m->m_data=%p\n", cpl->iff, mtod(m, uint8_t *)); 2130 2131 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2132 cpl->csum_valid && cpl->csum == 0xffff) { 2133 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2134 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2135 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2136 m->m_pkthdr.csum_data = 0xffff; 2137 } 2138 /* 2139 * XXX need to add VLAN support for 6.x 2140 */ 2141#ifdef VLAN_SUPPORTED 2142 if (__predict_false(cpl->vlan_valid)) { 2143 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2144 m->m_flags |= M_VLANTAG; 2145 } 2146#endif 2147 2148 m->m_pkthdr.rcvif = ifp; 2149 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2150 m_explode(m); 2151 /* 2152 * adjust after conversion to mbuf chain 2153 */ 2154 m_adj(m, sizeof(*cpl) + ethpad); 2155 2156 (*ifp->if_input)(ifp, m); 2157} 2158 2159/** 2160 * get_packet - return the next ingress packet buffer from a free list 2161 * @adap: the adapter that received the packet 2162 * @drop_thres: # of remaining buffers before we start dropping packets 2163 * @qs: the qset that the SGE free list holding the packet belongs to 2164 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2165 * @r: response descriptor 2166 * 2167 * Get the next packet from a free list and complete setup of the 2168 * sk_buff. If the packet is small we make a copy and recycle the 2169 * original buffer, otherwise we use the original buffer itself. If a 2170 * positive drop threshold is supplied packets are dropped and their 2171 * buffers recycled if (a) the number of remaining buffers is under the 2172 * threshold and the packet is too big to copy, or (b) the packet should 2173 * be copied but there is no memory for the copy. 2174 */ 2175static int 2176get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2177 struct mbuf *m, struct rsp_desc *r) 2178{ 2179 2180 unsigned int len_cq = ntohl(r->len_cq); 2181 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2182 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2183 uint32_t len = G_RSPD_LEN(len_cq); 2184 uint32_t flags = ntohl(r->flags); 2185 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2186 void *cl; 2187 int ret = 0; 2188 2189 prefetch(sd->cl); 2190 2191 fl->credits--; 2192 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2193 2194 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2195 cl = mtod(m, void *); 2196 memcpy(cl, sd->cl, len); 2197 recycle_rx_buf(adap, fl, fl->cidx); 2198 } else { 2199 cl = sd->cl; 2200 bus_dmamap_unload(fl->entry_tag, sd->map); 2201 } 2202 switch(sopeop) { 2203 case RSPQ_SOP_EOP: 2204 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2205 if (cl == sd->cl) 2206 m_cljset(m, cl, fl->type); 2207 m->m_len = m->m_pkthdr.len = len; 2208 ret = 1; 2209 goto done; 2210 break; 2211 case RSPQ_NSOP_NEOP: 2212 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2213 ret = 0; 2214 break; 2215 case RSPQ_SOP: 2216 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2217 m_iovinit(m); 2218 ret = 0; 2219 break; 2220 case RSPQ_EOP: 2221 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2222 ret = 1; 2223 break; 2224 } 2225 m_iovappend(m, cl, fl->buf_size, len, 0); 2226 2227done: 2228 if (++fl->cidx == fl->size) 2229 fl->cidx = 0; 2230 2231 return (ret); 2232} 2233 2234/** 2235 * handle_rsp_cntrl_info - handles control information in a response 2236 * @qs: the queue set corresponding to the response 2237 * @flags: the response control flags 2238 * 2239 * Handles the control information of an SGE response, such as GTS 2240 * indications and completion credits for the queue set's Tx queues. 2241 * HW coalesces credits, we don't do any extra SW coalescing. 2242 */ 2243static __inline void 2244handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2245{ 2246 unsigned int credits; 2247 2248#if USE_GTS 2249 if (flags & F_RSPD_TXQ0_GTS) 2250 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2251#endif 2252 credits = G_RSPD_TXQ0_CR(flags); 2253 if (credits) { 2254 qs->txq[TXQ_ETH].processed += credits; 2255 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 2256 taskqueue_enqueue(qs->port->adapter->tq, 2257 &qs->port->timer_reclaim_task); 2258 } 2259 2260 credits = G_RSPD_TXQ2_CR(flags); 2261 if (credits) 2262 qs->txq[TXQ_CTRL].processed += credits; 2263 2264# if USE_GTS 2265 if (flags & F_RSPD_TXQ1_GTS) 2266 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2267# endif 2268 credits = G_RSPD_TXQ1_CR(flags); 2269 if (credits) 2270 qs->txq[TXQ_OFLD].processed += credits; 2271} 2272 2273static void 2274check_ring_db(adapter_t *adap, struct sge_qset *qs, 2275 unsigned int sleeping) 2276{ 2277 ; 2278} 2279 2280/** 2281 * process_responses - process responses from an SGE response queue 2282 * @adap: the adapter 2283 * @qs: the queue set to which the response queue belongs 2284 * @budget: how many responses can be processed in this round 2285 * 2286 * Process responses from an SGE response queue up to the supplied budget. 2287 * Responses include received packets as well as credits and other events 2288 * for the queues that belong to the response queue's queue set. 2289 * A negative budget is effectively unlimited. 2290 * 2291 * Additionally choose the interrupt holdoff time for the next interrupt 2292 * on this queue. If the system is under memory shortage use a fairly 2293 * long delay to help recovery. 2294 */ 2295static int 2296process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2297{ 2298 struct sge_rspq *rspq = &qs->rspq; 2299 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2300 int budget_left = budget; 2301 unsigned int sleeping = 0; 2302 int lro = qs->lro.enabled; 2303 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2304 int ngathered = 0; 2305#ifdef DEBUG 2306 static int last_holdoff = 0; 2307 if (rspq->holdoff_tmr != last_holdoff) { 2308 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2309 last_holdoff = rspq->holdoff_tmr; 2310 } 2311#endif 2312 rspq->next_holdoff = rspq->holdoff_tmr; 2313 2314 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2315 int eth, eop = 0, ethpad = 0; 2316 uint32_t flags = ntohl(r->flags); 2317 uint32_t rss_csum = *(const uint32_t *)r; 2318 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 2319 2320 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2321 2322 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2323 /* XXX */ 2324 printf("async notification\n"); 2325 2326 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2327 struct mbuf *m = NULL; 2328 if (cxgb_debug) 2329 printf("IMM DATA VALID\n"); 2330 if (rspq->m == NULL) 2331 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 2332 else 2333 m = m_gethdr(M_NOWAIT, MT_DATA); 2334 2335 if (rspq->m == NULL || m == NULL) { 2336 rspq->next_holdoff = NOMEM_INTR_DELAY; 2337 budget_left--; 2338 break; 2339 } 2340 get_imm_packet(adap, r, rspq->m, m); 2341 eop = 1; 2342 rspq->imm_data++; 2343 } else if (r->len_cq) { 2344 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2345 2346 if (rspq->m == NULL) 2347 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 2348 if (rspq->m == NULL) { 2349 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2350 break; 2351 } 2352 2353 ethpad = 2; 2354 eop = get_packet(adap, drop_thresh, qs, rspq->m, r); 2355 } else { 2356 DPRINTF("pure response\n"); 2357 rspq->pure_rsps++; 2358 } 2359 2360 if (flags & RSPD_CTRL_MASK) { 2361 sleeping |= flags & RSPD_GTS_MASK; 2362 handle_rsp_cntrl_info(qs, flags); 2363 } 2364 2365 r++; 2366 if (__predict_false(++rspq->cidx == rspq->size)) { 2367 rspq->cidx = 0; 2368 rspq->gen ^= 1; 2369 r = rspq->desc; 2370 } 2371 2372 prefetch(r); 2373 if (++rspq->credits >= (rspq->size / 4)) { 2374 refill_rspq(adap, rspq, rspq->credits); 2375 rspq->credits = 0; 2376 } 2377 2378 if (eop) { 2379 prefetch(mtod(rspq->m, uint8_t *)); 2380 prefetch(mtod(rspq->m, uint8_t *) + L1_CACHE_BYTES); 2381 2382 if (eth) { 2383 t3_rx_eth_lro(adap, rspq, rspq->m, ethpad, 2384 rss_hash, rss_csum, lro); 2385 2386 rspq->m = NULL; 2387 } else { 2388 rspq->m->m_pkthdr.csum_data = rss_csum; 2389 /* 2390 * XXX size mismatch 2391 */ 2392 m_set_priority(rspq->m, rss_hash); 2393 2394 ngathered = rx_offload(&adap->tdev, rspq, rspq->m, 2395 offload_mbufs, ngathered); 2396 } 2397#ifdef notyet 2398 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task); 2399#else 2400 __refill_fl(adap, &qs->fl[0]); 2401 __refill_fl(adap, &qs->fl[1]); 2402#endif 2403 } 2404 --budget_left; 2405 } 2406 2407 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2408 t3_lro_flush(adap, qs, &qs->lro); 2409 2410 if (sleeping) 2411 check_ring_db(adap, qs, sleeping); 2412 2413 smp_mb(); /* commit Tx queue processed updates */ 2414 if (__predict_false(qs->txq_stopped != 0)) 2415 restart_tx(qs); 2416 2417 budget -= budget_left; 2418 return (budget); 2419} 2420 2421/* 2422 * A helper function that processes responses and issues GTS. 2423 */ 2424static __inline int 2425process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2426{ 2427 int work; 2428 static int last_holdoff = 0; 2429 2430 work = process_responses(adap, rspq_to_qset(rq), -1); 2431 2432 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2433 printf("next_holdoff=%d\n", rq->next_holdoff); 2434 last_holdoff = rq->next_holdoff; 2435 } 2436 2437 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2438 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2439 return work; 2440} 2441 2442 2443/* 2444 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2445 * Handles data events from SGE response queues as well as error and other 2446 * async events as they all use the same interrupt pin. We use one SGE 2447 * response queue per port in this mode and protect all response queues with 2448 * queue 0's lock. 2449 */ 2450void 2451t3b_intr(void *data) 2452{ 2453 uint32_t map; 2454 adapter_t *adap = data; 2455 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2456 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2457 2458 t3_write_reg(adap, A_PL_CLI, 0); 2459 map = t3_read_reg(adap, A_SG_DATA_INTR); 2460 2461 if (!map) 2462 return; 2463 2464 if (__predict_false(map & F_ERRINTR)) 2465 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2466 2467 mtx_lock(&q0->lock); 2468 2469 if (__predict_true(map & 1)) 2470 process_responses_gts(adap, q0); 2471 2472 if (map & 2) 2473 process_responses_gts(adap, q1); 2474 2475 mtx_unlock(&q0->lock); 2476} 2477 2478/* 2479 * The MSI interrupt handler. This needs to handle data events from SGE 2480 * response queues as well as error and other async events as they all use 2481 * the same MSI vector. We use one SGE response queue per port in this mode 2482 * and protect all response queues with queue 0's lock. 2483 */ 2484void 2485t3_intr_msi(void *data) 2486{ 2487 adapter_t *adap = data; 2488 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2489 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2490 int new_packets = 0; 2491 2492 mtx_lock(&q0->lock); 2493 if (process_responses_gts(adap, q0)) { 2494 new_packets = 1; 2495 } 2496 2497 if (adap->params.nports == 2 && 2498 process_responses_gts(adap, q1)) { 2499 new_packets = 1; 2500 } 2501 2502 mtx_unlock(&q0->lock); 2503 if (new_packets == 0) 2504 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2505} 2506 2507void 2508t3_intr_msix(void *data) 2509{ 2510 struct sge_qset *qs = data; 2511 adapter_t *adap = qs->port->adapter; 2512 struct sge_rspq *rspq = &qs->rspq; 2513 2514 mtx_lock(&rspq->lock); 2515 if (process_responses_gts(adap, rspq) == 0) 2516 rspq->unhandled_irqs++; 2517 mtx_unlock(&rspq->lock); 2518} 2519 2520/* 2521 * broken by recent mbuf changes 2522 */ 2523static int 2524t3_lro_enable(SYSCTL_HANDLER_ARGS) 2525{ 2526 adapter_t *sc; 2527 int i, j, enabled, err, nqsets = 0; 2528 2529#ifndef LRO_WORKING 2530 return (0); 2531#endif 2532 2533 sc = arg1; 2534 enabled = sc->sge.qs[0].lro.enabled; 2535 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2536 2537 if (err != 0) 2538 return (err); 2539 if (enabled == sc->sge.qs[0].lro.enabled) 2540 return (0); 2541 2542 for (i = 0; i < sc->params.nports; i++) 2543 for (j = 0; j < sc->port[i].nqsets; j++) 2544 nqsets++; 2545 2546 for (i = 0; i < nqsets; i++) 2547 sc->sge.qs[i].lro.enabled = enabled; 2548 2549 return (0); 2550} 2551 2552static int 2553t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2554{ 2555 adapter_t *sc = arg1; 2556 struct qset_params *qsp = &sc->params.sge.qset[0]; 2557 int coalesce_nsecs; 2558 struct sge_qset *qs; 2559 int i, j, err, nqsets = 0; 2560 struct mtx *lock; 2561 2562 coalesce_nsecs = qsp->coalesce_nsecs; 2563 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2564 2565 if (err != 0) { 2566 return (err); 2567 } 2568 if (coalesce_nsecs == qsp->coalesce_nsecs) 2569 return (0); 2570 2571 for (i = 0; i < sc->params.nports; i++) 2572 for (j = 0; j < sc->port[i].nqsets; j++) 2573 nqsets++; 2574 2575 coalesce_nsecs = max(100, coalesce_nsecs); 2576 2577 for (i = 0; i < nqsets; i++) { 2578 qs = &sc->sge.qs[i]; 2579 qsp = &sc->params.sge.qset[i]; 2580 qsp->coalesce_nsecs = coalesce_nsecs; 2581 2582 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2583 &sc->sge.qs[0].rspq.lock; 2584 2585 mtx_lock(lock); 2586 t3_update_qset_coalesce(qs, qsp); 2587 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2588 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2589 mtx_unlock(lock); 2590 } 2591 2592 return (0); 2593} 2594 2595 2596void 2597t3_add_sysctls(adapter_t *sc) 2598{ 2599 struct sysctl_ctx_list *ctx; 2600 struct sysctl_oid_list *children; 2601 2602 ctx = device_get_sysctl_ctx(sc->dev); 2603 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2604 2605 /* random information */ 2606 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2607 "firmware_version", 2608 CTLFLAG_RD, &sc->fw_version, 2609 0, "firmware version"); 2610 2611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2612 "enable_lro", 2613 CTLTYPE_INT|CTLFLAG_RW, sc, 2614 0, t3_lro_enable, 2615 "I", "enable large receive offload"); 2616 2617 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2618 "intr_coal", 2619 CTLTYPE_INT|CTLFLAG_RW, sc, 2620 0, t3_set_coalesce_nsecs, 2621 "I", "interrupt coalescing timer (ns)"); 2622 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2623 "enable_debug", 2624 CTLFLAG_RW, &cxgb_debug, 2625 0, "enable verbose debugging output"); 2626 2627 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2628 "collapse_free", 2629 CTLFLAG_RD, &collapse_free, 2630 0, "frees during collapse"); 2631 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2632 "mb_free_vec_free", 2633 CTLFLAG_RD, &mb_free_vec_free, 2634 0, "frees during mb_free_vec"); 2635 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2636 "collapse_mbufs", 2637 CTLFLAG_RW, &collapse_mbufs, 2638 0, "collapse mbuf chains into iovecs"); 2639} 2640 2641/** 2642 * t3_get_desc - dump an SGE descriptor for debugging purposes 2643 * @qs: the queue set 2644 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2645 * @idx: the descriptor index in the queue 2646 * @data: where to dump the descriptor contents 2647 * 2648 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2649 * size of the descriptor. 2650 */ 2651int 2652t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2653 unsigned char *data) 2654{ 2655 if (qnum >= 6) 2656 return (EINVAL); 2657 2658 if (qnum < 3) { 2659 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2660 return -EINVAL; 2661 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2662 return sizeof(struct tx_desc); 2663 } 2664 2665 if (qnum == 3) { 2666 if (!qs->rspq.desc || idx >= qs->rspq.size) 2667 return (EINVAL); 2668 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2669 return sizeof(struct rsp_desc); 2670 } 2671 2672 qnum -= 4; 2673 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2674 return (EINVAL); 2675 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2676 return sizeof(struct rx_desc); 2677}
|