t4_sge.c revision 221464
1/*- 2 * Copyright (c) 2011 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_sge.c 221464 2011-05-04 23:07:30Z np $"); 30 31#include "opt_inet.h" 32 33#include <sys/types.h> 34#include <sys/mbuf.h> 35#include <sys/socket.h> 36#include <sys/kernel.h> 37#include <sys/malloc.h> 38#include <sys/queue.h> 39#include <sys/taskqueue.h> 40#include <sys/sysctl.h> 41#include <net/bpf.h> 42#include <net/ethernet.h> 43#include <net/if.h> 44#include <net/if_vlan_var.h> 45#include <netinet/in.h> 46#include <netinet/ip.h> 47#include <netinet/tcp.h> 48 49#include "common/common.h" 50#include "common/t4_regs.h" 51#include "common/t4_regs_values.h" 52#include "common/t4_msg.h" 53#include "common/t4fw_interface.h" 54 55struct fl_buf_info { 56 int size; 57 int type; 58 uma_zone_t zone; 59}; 60 61/* Filled up by t4_sge_modload */ 62static struct fl_buf_info fl_buf_info[FL_BUF_SIZES]; 63 64#define FL_BUF_SIZE(x) (fl_buf_info[x].size) 65#define FL_BUF_TYPE(x) (fl_buf_info[x].type) 66#define FL_BUF_ZONE(x) (fl_buf_info[x].zone) 67 68enum { 69 FL_PKTSHIFT = 2 70}; 71 72#define FL_ALIGN min(CACHE_LINE_SIZE, 32) 73#if CACHE_LINE_SIZE > 64 74#define SPG_LEN 128 75#else 76#define SPG_LEN 64 77#endif 78 79/* Used to track coalesced tx work request */ 80struct txpkts { 81 uint64_t *flitp; /* ptr to flit where next pkt should start */ 82 uint8_t npkt; /* # of packets in this work request */ 83 uint8_t nflits; /* # of flits used by this work request */ 84 uint16_t plen; /* total payload (sum of all packets) */ 85}; 86 87/* A packet's SGL. This + m_pkthdr has all info needed for tx */ 88struct sgl { 89 int nsegs; /* # of segments in the SGL, 0 means imm. tx */ 90 int nflits; /* # of flits needed for the SGL */ 91 bus_dma_segment_t seg[TX_SGL_SEGS]; 92}; 93 94static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, 95 int, iq_intr_handler_t *, char *); 96static inline void init_fl(struct sge_fl *, int, char *); 97static inline void init_eq(struct sge_eq *, int, char *); 98static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 99 bus_addr_t *, void **); 100static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 101 void *); 102static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, 103 int); 104static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); 105static int alloc_iq(struct sge_iq *, int); 106static int free_iq(struct sge_iq *); 107static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int); 108static int free_rxq(struct port_info *, struct sge_rxq *); 109static int alloc_ctrlq(struct adapter *, struct sge_ctrlq *, int); 110static int free_ctrlq(struct adapter *, struct sge_ctrlq *); 111static int alloc_txq(struct port_info *, struct sge_txq *, int); 112static int free_txq(struct port_info *, struct sge_txq *); 113static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 114static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **); 115static inline void iq_next(struct sge_iq *); 116static inline void ring_fl_db(struct adapter *, struct sge_fl *); 117static void refill_fl(struct adapter *, struct sge_fl *, int, int); 118static int alloc_fl_sdesc(struct sge_fl *); 119static void free_fl_sdesc(struct sge_fl *); 120static int alloc_tx_maps(struct sge_txq *); 121static void free_tx_maps(struct sge_txq *); 122static void set_fl_tag_idx(struct sge_fl *, int); 123 124static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int); 125static int free_pkt_sgl(struct sge_txq *, struct sgl *); 126static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *, 127 struct sgl *); 128static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *, 129 struct mbuf *, struct sgl *); 130static void write_txpkts_wr(struct sge_txq *, struct txpkts *); 131static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *, 132 struct txpkts *, struct mbuf *, struct sgl *); 133static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *); 134static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 135static inline void ring_eq_db(struct adapter *, struct sge_eq *); 136static inline int reclaimable(struct sge_eq *); 137static int reclaim_tx_descs(struct sge_txq *, int, int); 138static void write_eqflush_wr(struct sge_eq *); 139static __be64 get_flit(bus_dma_segment_t *, int, int); 140static int handle_sge_egr_update(struct adapter *, 141 const struct cpl_sge_egr_update *); 142 143static int ctrl_tx(struct adapter *, struct sge_ctrlq *, struct mbuf *); 144 145/* 146 * Called on MOD_LOAD and fills up fl_buf_info[]. 147 */ 148void 149t4_sge_modload(void) 150{ 151 int i; 152 int bufsize[FL_BUF_SIZES] = { 153 MCLBYTES, 154#if MJUMPAGESIZE != MCLBYTES 155 MJUMPAGESIZE, 156#endif 157 MJUM9BYTES, 158 MJUM16BYTES 159 }; 160 161 for (i = 0; i < FL_BUF_SIZES; i++) { 162 FL_BUF_SIZE(i) = bufsize[i]; 163 FL_BUF_TYPE(i) = m_gettype(bufsize[i]); 164 FL_BUF_ZONE(i) = m_getzone(bufsize[i]); 165 } 166} 167 168/** 169 * t4_sge_init - initialize SGE 170 * @sc: the adapter 171 * 172 * Performs SGE initialization needed every time after a chip reset. 173 * We do not initialize any of the queues here, instead the driver 174 * top-level must request them individually. 175 */ 176void 177t4_sge_init(struct adapter *sc) 178{ 179 struct sge *s = &sc->sge; 180 int i; 181 182 t4_set_reg_field(sc, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT) | 183 V_INGPADBOUNDARY(M_INGPADBOUNDARY) | 184 F_EGRSTATUSPAGESIZE, 185 V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) | 186 V_PKTSHIFT(FL_PKTSHIFT) | 187 F_RXPKTCPLMODE | 188 V_EGRSTATUSPAGESIZE(SPG_LEN == 128)); 189 t4_set_reg_field(sc, A_SGE_HOST_PAGE_SIZE, 190 V_HOSTPAGESIZEPF0(M_HOSTPAGESIZEPF0), 191 V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10)); 192 193 for (i = 0; i < FL_BUF_SIZES; i++) { 194 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), 195 FL_BUF_SIZE(i)); 196 } 197 198 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, 199 V_THRESHOLD_0(s->counter_val[0]) | 200 V_THRESHOLD_1(s->counter_val[1]) | 201 V_THRESHOLD_2(s->counter_val[2]) | 202 V_THRESHOLD_3(s->counter_val[3])); 203 204 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, 205 V_TIMERVALUE0(us_to_core_ticks(sc, s->timer_val[0])) | 206 V_TIMERVALUE1(us_to_core_ticks(sc, s->timer_val[1]))); 207 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, 208 V_TIMERVALUE2(us_to_core_ticks(sc, s->timer_val[2])) | 209 V_TIMERVALUE3(us_to_core_ticks(sc, s->timer_val[3]))); 210 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, 211 V_TIMERVALUE4(us_to_core_ticks(sc, s->timer_val[4])) | 212 V_TIMERVALUE5(us_to_core_ticks(sc, s->timer_val[5]))); 213} 214 215int 216t4_create_dma_tag(struct adapter *sc) 217{ 218 int rc; 219 220 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 221 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 222 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 223 NULL, &sc->dmat); 224 if (rc != 0) { 225 device_printf(sc->dev, 226 "failed to create main DMA tag: %d\n", rc); 227 } 228 229 return (rc); 230} 231 232int 233t4_destroy_dma_tag(struct adapter *sc) 234{ 235 if (sc->dmat) 236 bus_dma_tag_destroy(sc->dmat); 237 238 return (0); 239} 240 241/* 242 * Allocate and initialize the firmware event queue, control queues, and the 243 * forwarded interrupt queues (if any). The adapter owns all these queues as 244 * they are not associated with any particular port. 245 * 246 * Returns errno on failure. Resources allocated up to that point may still be 247 * allocated. Caller is responsible for cleanup in case this function fails. 248 */ 249int 250t4_setup_adapter_queues(struct adapter *sc) 251{ 252 int i, rc; 253 struct sge_iq *iq, *fwq; 254 struct sge_ctrlq *ctrlq; 255 iq_intr_handler_t *handler; 256 char name[16]; 257 258 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 259 260 if (sysctl_ctx_init(&sc->ctx) == 0) { 261 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 262 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 263 264 sc->oid_ctrlq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, 265 "ctrlq", CTLFLAG_RD, NULL, "ctrl queues"); 266 } 267 268 fwq = &sc->sge.fwq; 269 if (sc->flags & INTR_FWD) { 270 iq = &sc->sge.fiq[0]; 271 272 /* 273 * Forwarded interrupt queues - allocate 1 if there's only 1 274 * vector available, one less than the number of vectors 275 * otherwise (the first vector is reserved for the error 276 * interrupt in that case). 277 */ 278 i = sc->intr_count > 1 ? 1 : 0; 279 for (; i < sc->intr_count; i++, iq++) { 280 281 snprintf(name, sizeof(name), "%s fiq%d", 282 device_get_nameunit(sc->dev), i); 283 init_iq(iq, sc, 0, 0, (sc->sge.nrxq + 1) * 2, 16, NULL, 284 name); 285 286 rc = alloc_iq(iq, i); 287 if (rc != 0) { 288 device_printf(sc->dev, 289 "failed to create fwd intr queue %d: %d\n", 290 i, rc); 291 return (rc); 292 } 293 } 294 295 handler = t4_evt_rx; 296 i = 0; /* forward fwq's interrupt to the first fiq */ 297 } else { 298 handler = NULL; 299 i = 1; /* fwq should use vector 1 (0 is used by error) */ 300 } 301 302 snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev)); 303 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name); 304 rc = alloc_iq(fwq, i); 305 if (rc != 0) { 306 device_printf(sc->dev, 307 "failed to create firmware event queue: %d\n", rc); 308 309 return (rc); 310 } 311 312 /* 313 * Control queues - one per hardware channel. 314 */ 315 ctrlq = &sc->sge.ctrlq[0]; 316 for (i = 0; i < NCHAN; i++, ctrlq++) { 317 snprintf(name, sizeof(name), "%s ctrlq%d", 318 device_get_nameunit(sc->dev), i); 319 init_eq(&ctrlq->eq, CTRL_EQ_QSIZE, name); 320 321 rc = alloc_ctrlq(sc, ctrlq, i); 322 if (rc != 0) { 323 device_printf(sc->dev, 324 "failed to create control queue %d: %d\n", i, rc); 325 return (rc); 326 } 327 } 328 329 return (rc); 330} 331 332/* 333 * Idempotent 334 */ 335int 336t4_teardown_adapter_queues(struct adapter *sc) 337{ 338 int i; 339 struct sge_iq *iq; 340 341 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 342 343 /* Do this before freeing the queues */ 344 if (sc->oid_ctrlq) { 345 sysctl_ctx_free(&sc->ctx); 346 sc->oid_ctrlq = NULL; 347 } 348 349 for (i = 0; i < NCHAN; i++) 350 free_ctrlq(sc, &sc->sge.ctrlq[i]); 351 352 iq = &sc->sge.fwq; 353 free_iq(iq); 354 if (sc->flags & INTR_FWD) { 355 for (i = 0; i < NFIQ(sc); i++) { 356 iq = &sc->sge.fiq[i]; 357 free_iq(iq); 358 } 359 } 360 361 return (0); 362} 363 364int 365t4_setup_eth_queues(struct port_info *pi) 366{ 367 int rc = 0, i, intr_idx; 368 struct sge_rxq *rxq; 369 struct sge_txq *txq; 370 char name[16]; 371 struct adapter *sc = pi->adapter; 372 373 if (sysctl_ctx_init(&pi->ctx) == 0) { 374 struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); 375 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 376 377 pi->oid_rxq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, 378 "rxq", CTLFLAG_RD, NULL, "rx queues"); 379 pi->oid_txq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, 380 "txq", CTLFLAG_RD, NULL, "tx queues"); 381 } 382 383 for_each_rxq(pi, i, rxq) { 384 385 snprintf(name, sizeof(name), "%s rxq%d-iq", 386 device_get_nameunit(pi->dev), i); 387 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, 388 pi->qsize_rxq, RX_IQ_ESIZE, 389 sc->flags & INTR_FWD ? t4_eth_rx : NULL, name); 390 391 snprintf(name, sizeof(name), "%s rxq%d-fl", 392 device_get_nameunit(pi->dev), i); 393 init_fl(&rxq->fl, pi->qsize_rxq / 8, name); 394 395 if (sc->flags & INTR_FWD) 396 intr_idx = (pi->first_rxq + i) % NFIQ(sc); 397 else 398 intr_idx = pi->first_rxq + i + 2; 399 400 rc = alloc_rxq(pi, rxq, intr_idx, i); 401 if (rc != 0) 402 goto done; 403 404 intr_idx++; 405 } 406 407 for_each_txq(pi, i, txq) { 408 409 snprintf(name, sizeof(name), "%s txq%d", 410 device_get_nameunit(pi->dev), i); 411 init_eq(&txq->eq, pi->qsize_txq, name); 412 413 rc = alloc_txq(pi, txq, i); 414 if (rc != 0) 415 goto done; 416 } 417 418done: 419 if (rc) 420 t4_teardown_eth_queues(pi); 421 422 return (rc); 423} 424 425/* 426 * Idempotent 427 */ 428int 429t4_teardown_eth_queues(struct port_info *pi) 430{ 431 int i; 432 struct sge_rxq *rxq; 433 struct sge_txq *txq; 434 435 /* Do this before freeing the queues */ 436 if (pi->oid_txq || pi->oid_rxq) { 437 sysctl_ctx_free(&pi->ctx); 438 pi->oid_txq = pi->oid_rxq = NULL; 439 } 440 441 for_each_txq(pi, i, txq) { 442 free_txq(pi, txq); 443 } 444 445 for_each_rxq(pi, i, rxq) { 446 free_rxq(pi, rxq); 447 } 448 449 return (0); 450} 451 452/* Deals with errors and forwarded interrupts */ 453void 454t4_intr_all(void *arg) 455{ 456 struct adapter *sc = arg; 457 458 t4_intr_err(arg); 459 t4_intr_fwd(&sc->sge.fiq[0]); 460} 461 462/* Deals with forwarded interrupts on the given ingress queue */ 463void 464t4_intr_fwd(void *arg) 465{ 466 struct sge_iq *iq = arg, *q; 467 struct adapter *sc = iq->adapter; 468 struct rsp_ctrl *ctrl; 469 int ndesc_pending = 0, ndesc_total = 0; 470 int qid; 471 472 if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) 473 return; 474 475 while (is_new_response(iq, &ctrl)) { 476 477 rmb(); 478 479 /* Only interrupt muxing expected on this queue */ 480 KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_INTR, 481 ("unexpected event on forwarded interrupt queue: %x", 482 G_RSPD_TYPE(ctrl->u.type_gen))); 483 484 qid = ntohl(ctrl->pldbuflen_qid) - sc->sge.iq_start; 485 q = sc->sge.iqmap[qid]; 486 487 q->handler(q); 488 489 ndesc_total++; 490 if (++ndesc_pending >= iq->qsize / 4) { 491 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 492 V_CIDXINC(ndesc_pending) | 493 V_INGRESSQID(iq->cntxt_id) | 494 V_SEINTARM( 495 V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 496 ndesc_pending = 0; 497 } 498 499 iq_next(iq); 500 } 501 502 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | 503 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 504 505 atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); 506} 507 508/* Deals with error interrupts */ 509void 510t4_intr_err(void *arg) 511{ 512 struct adapter *sc = arg; 513 514 if (sc->intr_type == INTR_INTX) 515 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 516 517 t4_slow_intr_handler(sc); 518} 519 520/* Deals with the firmware event queue */ 521void 522t4_intr_evt(void *arg) 523{ 524 struct sge_iq *iq = arg; 525 526 if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) 527 return; 528 529 t4_evt_rx(arg); 530 531 atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); 532} 533 534void 535t4_intr_data(void *arg) 536{ 537 struct sge_iq *iq = arg; 538 539 if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) 540 return; 541 542 t4_eth_rx(arg); 543 544 atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); 545} 546 547void 548t4_evt_rx(void *arg) 549{ 550 struct sge_iq *iq = arg; 551 struct adapter *sc = iq->adapter; 552 struct rsp_ctrl *ctrl; 553 const struct rss_header *rss; 554 int ndesc_pending = 0, ndesc_total = 0; 555 556 KASSERT(iq == &sc->sge.fwq, ("%s: unexpected ingress queue", __func__)); 557 558 while (is_new_response(iq, &ctrl)) { 559 560 rmb(); 561 562 rss = (const void *)iq->cdesc; 563 564 /* Should only get CPL on this queue */ 565 KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_CPL, 566 ("%s: unexpected type %d", __func__, 567 G_RSPD_TYPE(ctrl->u.type_gen))); 568 569 switch (rss->opcode) { 570 case CPL_FW4_MSG: 571 case CPL_FW6_MSG: { 572 const struct cpl_fw6_msg *cpl; 573 574 cpl = (const void *)(rss + 1); 575 if (cpl->type == FW6_TYPE_CMD_RPL) 576 t4_handle_fw_rpl(sc, cpl->data); 577 578 break; 579 } 580 case CPL_SGE_EGR_UPDATE: 581 handle_sge_egr_update(sc, (const void *)(rss + 1)); 582 break; 583 584 default: 585 device_printf(sc->dev, 586 "can't handle CPL opcode %d.", rss->opcode); 587 } 588 589 ndesc_total++; 590 if (++ndesc_pending >= iq->qsize / 4) { 591 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 592 V_CIDXINC(ndesc_pending) | 593 V_INGRESSQID(iq->cntxt_id) | 594 V_SEINTARM( 595 V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 596 ndesc_pending = 0; 597 } 598 iq_next(iq); 599 } 600 601 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | 602 V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 603} 604 605void 606t4_eth_rx(void *arg) 607{ 608 struct sge_rxq *rxq = arg; 609 struct sge_iq *iq = arg; 610 struct adapter *sc = iq->adapter; 611 struct rsp_ctrl *ctrl; 612 struct ifnet *ifp = rxq->ifp; 613 struct sge_fl *fl = &rxq->fl; 614 struct fl_sdesc *sd = &fl->sdesc[fl->cidx], *sd_next; 615 const struct rss_header *rss; 616 const struct cpl_rx_pkt *cpl; 617 uint32_t len; 618 int ndescs = 0, i; 619 struct mbuf *m0, *m; 620#ifdef INET 621 struct lro_ctrl *lro = &rxq->lro; 622 struct lro_entry *l; 623#endif 624 625 prefetch(sd->m); 626 prefetch(sd->cl); 627 628 iq->intr_next = iq->intr_params; 629 while (is_new_response(iq, &ctrl)) { 630 631 rmb(); 632 633 rss = (const void *)iq->cdesc; 634 i = G_RSPD_TYPE(ctrl->u.type_gen); 635 636 if (__predict_false(i == X_RSPD_TYPE_CPL)) { 637 638 /* Can't be anything except an egress update */ 639 KASSERT(rss->opcode == CPL_SGE_EGR_UPDATE, 640 ("%s: unexpected CPL %x", __func__, rss->opcode)); 641 642 handle_sge_egr_update(sc, (const void *)(rss + 1)); 643 goto nextdesc; 644 } 645 KASSERT(i == X_RSPD_TYPE_FLBUF && rss->opcode == CPL_RX_PKT, 646 ("%s: unexpected CPL %x rsp %d", __func__, rss->opcode, i)); 647 648 sd_next = sd + 1; 649 if (__predict_false(fl->cidx + 1 == fl->cap)) 650 sd_next = fl->sdesc; 651 prefetch(sd_next->m); 652 prefetch(sd_next->cl); 653 654 cpl = (const void *)(rss + 1); 655 656 m0 = sd->m; 657 sd->m = NULL; /* consumed */ 658 659 len = be32toh(ctrl->pldbuflen_qid); 660 if (__predict_false((len & F_RSPD_NEWBUF) == 0)) 661 panic("%s: cannot handle packed frames", __func__); 662 len = G_RSPD_LEN(len); 663 664 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, 665 BUS_DMASYNC_POSTREAD); 666 667 m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR); 668 if (len < MINCLSIZE) { 669 /* copy data to mbuf, buffer will be recycled */ 670 bcopy(sd->cl, mtod(m0, caddr_t), len); 671 m0->m_len = len; 672 } else { 673 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 674 m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx)); 675 sd->cl = NULL; /* consumed */ 676 m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); 677 } 678 679 len -= FL_PKTSHIFT; 680 m0->m_len -= FL_PKTSHIFT; 681 m0->m_data += FL_PKTSHIFT; 682 683 m0->m_pkthdr.len = len; 684 m0->m_pkthdr.rcvif = ifp; 685 m0->m_flags |= M_FLOWID; 686 m0->m_pkthdr.flowid = rss->hash_val; 687 688 if (cpl->csum_calc && !cpl->err_vec && 689 ifp->if_capenable & IFCAP_RXCSUM) { 690 m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | 691 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 692 if (cpl->ip_frag) 693 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 694 else 695 m0->m_pkthdr.csum_data = 0xffff; 696 rxq->rxcsum++; 697 } 698 699 if (cpl->vlan_ex) { 700 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 701 m0->m_flags |= M_VLANTAG; 702 rxq->vlan_extraction++; 703 } 704 705 i = 1; /* # of fl sdesc used */ 706 sd = sd_next; 707 if (__predict_false(++fl->cidx == fl->cap)) 708 fl->cidx = 0; 709 710 len -= m0->m_len; 711 m = m0; 712 while (len) { 713 i++; 714 715 sd_next = sd + 1; 716 if (__predict_false(fl->cidx + 1 == fl->cap)) 717 sd_next = fl->sdesc; 718 prefetch(sd_next->m); 719 prefetch(sd_next->cl); 720 721 m->m_next = sd->m; 722 sd->m = NULL; /* consumed */ 723 m = m->m_next; 724 725 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, 726 BUS_DMASYNC_POSTREAD); 727 728 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); 729 if (len <= MLEN) { 730 bcopy(sd->cl, mtod(m, caddr_t), len); 731 m->m_len = len; 732 } else { 733 bus_dmamap_unload(fl->tag[sd->tag_idx], 734 sd->map); 735 m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx)); 736 sd->cl = NULL; /* consumed */ 737 m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); 738 } 739 740 i++; 741 sd = sd_next; 742 if (__predict_false(++fl->cidx == fl->cap)) 743 fl->cidx = 0; 744 745 len -= m->m_len; 746 } 747 748#ifdef INET 749 if (cpl->l2info & htobe32(F_RXF_LRO) && 750 rxq->flags & RXQ_LRO_ENABLED && 751 tcp_lro_rx(lro, m0, 0) == 0) { 752 /* queued for LRO */ 753 } else 754#endif 755 ifp->if_input(ifp, m0); 756 757 FL_LOCK(fl); 758 fl->needed += i; 759 if (fl->needed >= 32) 760 refill_fl(sc, fl, 64, 32); 761 FL_UNLOCK(fl); 762 763nextdesc: ndescs++; 764 iq_next(iq); 765 766 if (ndescs > 32) { 767 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 768 V_CIDXINC(ndescs) | 769 V_INGRESSQID((u32)iq->cntxt_id) | 770 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 771 ndescs = 0; 772 } 773 } 774 775#ifdef INET 776 while (!SLIST_EMPTY(&lro->lro_active)) { 777 l = SLIST_FIRST(&lro->lro_active); 778 SLIST_REMOVE_HEAD(&lro->lro_active, next); 779 tcp_lro_flush(lro, l); 780 } 781#endif 782 783 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | 784 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next)); 785 786 FL_LOCK(fl); 787 if (fl->needed >= 32) 788 refill_fl(sc, fl, 128, 8); 789 FL_UNLOCK(fl); 790} 791 792int 793t4_mgmt_tx(struct adapter *sc, struct mbuf *m) 794{ 795 return ctrl_tx(sc, &sc->sge.ctrlq[0], m); 796} 797 798/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */ 799#define TXPKTS_PKT_HDR ((\ 800 sizeof(struct ulp_txpkt) + \ 801 sizeof(struct ulptx_idata) + \ 802 sizeof(struct cpl_tx_pkt_core) \ 803 ) / 8) 804 805/* Header of a coalesced tx WR, before SGL of first packet (in flits) */ 806#define TXPKTS_WR_HDR (\ 807 sizeof(struct fw_eth_tx_pkts_wr) / 8 + \ 808 TXPKTS_PKT_HDR) 809 810/* Header of a tx WR, before SGL of first packet (in flits) */ 811#define TXPKT_WR_HDR ((\ 812 sizeof(struct fw_eth_tx_pkt_wr) + \ 813 sizeof(struct cpl_tx_pkt_core) \ 814 ) / 8 ) 815 816/* Header of a tx LSO WR, before SGL of first packet (in flits) */ 817#define TXPKT_LSO_WR_HDR ((\ 818 sizeof(struct fw_eth_tx_pkt_wr) + \ 819 sizeof(struct cpl_tx_pkt_lso) + \ 820 sizeof(struct cpl_tx_pkt_core) \ 821 ) / 8 ) 822 823int 824t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m) 825{ 826 struct port_info *pi = (void *)ifp->if_softc; 827 struct adapter *sc = pi->adapter; 828 struct sge_eq *eq = &txq->eq; 829 struct buf_ring *br = txq->br; 830 struct mbuf *next; 831 int rc, coalescing, can_reclaim; 832 struct txpkts txpkts; 833 struct sgl sgl; 834 835 TXQ_LOCK_ASSERT_OWNED(txq); 836 KASSERT(m, ("%s: called with nothing to do.", __func__)); 837 838 prefetch(&eq->desc[eq->pidx]); 839 prefetch(&txq->sdesc[eq->pidx]); 840 841 txpkts.npkt = 0;/* indicates there's nothing in txpkts */ 842 coalescing = 0; 843 844 if (eq->avail < 8) 845 reclaim_tx_descs(txq, 0, 8); 846 847 for (; m; m = next ? next : drbr_dequeue(ifp, br)) { 848 849 if (eq->avail < 8) 850 break; 851 852 next = m->m_nextpkt; 853 m->m_nextpkt = NULL; 854 855 if (next || buf_ring_peek(br)) 856 coalescing = 1; 857 858 rc = get_pkt_sgl(txq, &m, &sgl, coalescing); 859 if (rc != 0) { 860 if (rc == ENOMEM) { 861 862 /* Short of resources, suspend tx */ 863 864 m->m_nextpkt = next; 865 break; 866 } 867 868 /* 869 * Unrecoverable error for this packet, throw it away 870 * and move on to the next. get_pkt_sgl may already 871 * have freed m (it will be NULL in that case and the 872 * m_freem here is still safe). 873 */ 874 875 m_freem(m); 876 continue; 877 } 878 879 if (coalescing && 880 add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) { 881 882 /* Successfully absorbed into txpkts */ 883 884 write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl); 885 goto doorbell; 886 } 887 888 /* 889 * We weren't coalescing to begin with, or current frame could 890 * not be coalesced (add_to_txpkts flushes txpkts if a frame 891 * given to it can't be coalesced). Either way there should be 892 * nothing in txpkts. 893 */ 894 KASSERT(txpkts.npkt == 0, 895 ("%s: txpkts not empty: %d", __func__, txpkts.npkt)); 896 897 /* We're sending out individual packets now */ 898 coalescing = 0; 899 900 if (eq->avail < 8) 901 reclaim_tx_descs(txq, 0, 8); 902 rc = write_txpkt_wr(pi, txq, m, &sgl); 903 if (rc != 0) { 904 905 /* Short of hardware descriptors, suspend tx */ 906 907 /* 908 * This is an unlikely but expensive failure. We've 909 * done all the hard work (DMA mappings etc.) and now we 910 * can't send out the packet. What's worse, we have to 911 * spend even more time freeing up everything in sgl. 912 */ 913 txq->no_desc++; 914 free_pkt_sgl(txq, &sgl); 915 916 m->m_nextpkt = next; 917 break; 918 } 919 920 ETHER_BPF_MTAP(ifp, m); 921 if (sgl.nsegs == 0) 922 m_freem(m); 923 924doorbell: 925 /* Fewer and fewer doorbells as the queue fills up */ 926 if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2))) 927 ring_eq_db(sc, eq); 928 929 can_reclaim = reclaimable(eq); 930 if (can_reclaim >= 32) 931 reclaim_tx_descs(txq, can_reclaim, 32); 932 } 933 934 if (txpkts.npkt > 0) 935 write_txpkts_wr(txq, &txpkts); 936 937 /* 938 * m not NULL means there was an error but we haven't thrown it away. 939 * This can happen when we're short of tx descriptors (no_desc) or maybe 940 * even DMA maps (no_dmamap). Either way, a credit flush and reclaim 941 * will get things going again. 942 * 943 * If eq->avail is already 0 we know a credit flush was requested in the 944 * WR that reduced it to 0 so we don't need another flush (we don't have 945 * any descriptor for a flush WR anyway, duh). 946 */ 947 if (m && eq->avail > 0 && !(eq->flags & EQ_CRFLUSHED)) { 948 struct tx_sdesc *txsd = &txq->sdesc[eq->pidx]; 949 950 txsd->desc_used = 1; 951 txsd->credits = 0; 952 write_eqflush_wr(eq); 953 } 954 txq->m = m; 955 956 if (eq->pending) 957 ring_eq_db(sc, eq); 958 959 can_reclaim = reclaimable(eq); 960 if (can_reclaim >= 32) 961 reclaim_tx_descs(txq, can_reclaim, 128); 962 963 return (0); 964} 965 966void 967t4_update_fl_bufsize(struct ifnet *ifp) 968{ 969 struct port_info *pi = ifp->if_softc; 970 struct sge_rxq *rxq; 971 struct sge_fl *fl; 972 int i; 973 974 for_each_rxq(pi, i, rxq) { 975 fl = &rxq->fl; 976 977 FL_LOCK(fl); 978 set_fl_tag_idx(fl, ifp->if_mtu); 979 FL_UNLOCK(fl); 980 } 981} 982 983/* 984 * A non-NULL handler indicates this iq will not receive direct interrupts, the 985 * handler will be invoked by a forwarded interrupt queue. 986 */ 987static inline void 988init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 989 int qsize, int esize, iq_intr_handler_t *handler, char *name) 990{ 991 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 992 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 993 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 994 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 995 996 iq->flags = 0; 997 iq->adapter = sc; 998 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx) | 999 V_QINTR_CNT_EN(pktc_idx >= 0); 1000 iq->intr_pktc_idx = pktc_idx; 1001 iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */ 1002 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */ 1003 iq->handler = handler; 1004 strlcpy(iq->lockname, name, sizeof(iq->lockname)); 1005} 1006 1007static inline void 1008init_fl(struct sge_fl *fl, int qsize, char *name) 1009{ 1010 fl->qsize = qsize; 1011 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 1012} 1013 1014static inline void 1015init_eq(struct sge_eq *eq, int qsize, char *name) 1016{ 1017 eq->qsize = qsize; 1018 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 1019} 1020 1021static int 1022alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 1023 bus_dmamap_t *map, bus_addr_t *pa, void **va) 1024{ 1025 int rc; 1026 1027 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 1028 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 1029 if (rc != 0) { 1030 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 1031 goto done; 1032 } 1033 1034 rc = bus_dmamem_alloc(*tag, va, 1035 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 1036 if (rc != 0) { 1037 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 1038 goto done; 1039 } 1040 1041 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 1042 if (rc != 0) { 1043 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 1044 goto done; 1045 } 1046done: 1047 if (rc) 1048 free_ring(sc, *tag, *map, *pa, *va); 1049 1050 return (rc); 1051} 1052 1053static int 1054free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 1055 bus_addr_t pa, void *va) 1056{ 1057 if (pa) 1058 bus_dmamap_unload(tag, map); 1059 if (va) 1060 bus_dmamem_free(tag, va, map); 1061 if (tag) 1062 bus_dma_tag_destroy(tag); 1063 1064 return (0); 1065} 1066 1067/* 1068 * Allocates the ring for an ingress queue and an optional freelist. If the 1069 * freelist is specified it will be allocated and then associated with the 1070 * ingress queue. 1071 * 1072 * Returns errno on failure. Resources allocated up to that point may still be 1073 * allocated. Caller is responsible for cleanup in case this function fails. 1074 * 1075 * If the ingress queue will take interrupts directly (iq->handler == NULL) then 1076 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies 1077 * the index of the queue to which its interrupts will be forwarded. 1078 */ 1079static int 1080alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, 1081 int intr_idx) 1082{ 1083 int rc, i, cntxt_id; 1084 size_t len; 1085 struct fw_iq_cmd c; 1086 struct adapter *sc = iq->adapter; 1087 __be32 v = 0; 1088 1089 /* The adapter queues are nominally allocated in port[0]'s name */ 1090 if (pi == NULL) 1091 pi = sc->port[0]; 1092 1093 len = iq->qsize * iq->esize; 1094 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 1095 (void **)&iq->desc); 1096 if (rc != 0) 1097 return (rc); 1098 1099 bzero(&c, sizeof(c)); 1100 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 1101 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 1102 V_FW_IQ_CMD_VFN(0)); 1103 1104 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 1105 FW_LEN16(c)); 1106 1107 /* Special handling for firmware event queue */ 1108 if (iq == &sc->sge.fwq) 1109 v |= F_FW_IQ_CMD_IQASYNCH; 1110 1111 if (iq->handler) { 1112 KASSERT(intr_idx < NFIQ(sc), 1113 ("%s: invalid indirect intr_idx %d", __func__, intr_idx)); 1114 v |= F_FW_IQ_CMD_IQANDST; 1115 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fiq[intr_idx].abs_id); 1116 } else { 1117 KASSERT(intr_idx < sc->intr_count, 1118 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 1119 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 1120 } 1121 1122 c.type_to_iqandstindex = htobe32(v | 1123 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 1124 V_FW_IQ_CMD_VIID(pi->viid) | 1125 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 1126 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 1127 F_FW_IQ_CMD_IQGTSMODE | 1128 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 1129 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4)); 1130 c.iqsize = htobe16(iq->qsize); 1131 c.iqaddr = htobe64(iq->ba); 1132 1133 if (fl) { 1134 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 1135 1136 for (i = 0; i < FL_BUF_SIZES; i++) { 1137 1138 /* 1139 * A freelist buffer must be 16 byte aligned as the SGE 1140 * uses the low 4 bits of the bus addr to figure out the 1141 * buffer size. 1142 */ 1143 rc = bus_dma_tag_create(sc->dmat, 16, 0, 1144 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1145 FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW, 1146 NULL, NULL, &fl->tag[i]); 1147 if (rc != 0) { 1148 device_printf(sc->dev, 1149 "failed to create fl DMA tag[%d]: %d\n", 1150 i, rc); 1151 return (rc); 1152 } 1153 } 1154 len = fl->qsize * RX_FL_ESIZE; 1155 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 1156 &fl->ba, (void **)&fl->desc); 1157 if (rc) 1158 return (rc); 1159 1160 /* Allocate space for one software descriptor per buffer. */ 1161 fl->cap = (fl->qsize - SPG_LEN / RX_FL_ESIZE) * 8; 1162 FL_LOCK(fl); 1163 set_fl_tag_idx(fl, pi->ifp->if_mtu); 1164 rc = alloc_fl_sdesc(fl); 1165 FL_UNLOCK(fl); 1166 if (rc != 0) { 1167 device_printf(sc->dev, 1168 "failed to setup fl software descriptors: %d\n", 1169 rc); 1170 return (rc); 1171 } 1172 fl->needed = fl->cap; 1173 1174 c.iqns_to_fl0congen = 1175 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE)); 1176 c.fl0dcaen_to_fl0cidxfthresh = 1177 htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) | 1178 V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B)); 1179 c.fl0size = htobe16(fl->qsize); 1180 c.fl0addr = htobe64(fl->ba); 1181 } 1182 1183 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1184 if (rc != 0) { 1185 device_printf(sc->dev, 1186 "failed to create ingress queue: %d\n", rc); 1187 return (rc); 1188 } 1189 1190 iq->cdesc = iq->desc; 1191 iq->cidx = 0; 1192 iq->gen = 1; 1193 iq->intr_next = iq->intr_params; 1194 iq->cntxt_id = be16toh(c.iqid); 1195 iq->abs_id = be16toh(c.physiqid); 1196 iq->flags |= (IQ_ALLOCATED | IQ_STARTED); 1197 1198 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 1199 KASSERT(cntxt_id < sc->sge.niq, 1200 ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 1201 cntxt_id, sc->sge.niq - 1)); 1202 sc->sge.iqmap[cntxt_id] = iq; 1203 1204 if (fl) { 1205 fl->cntxt_id = be16toh(c.fl0id); 1206 fl->pidx = fl->cidx = 0; 1207 1208 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 1209 KASSERT(cntxt_id < sc->sge.neq, 1210 ("%s: fl->cntxt_id (%d) more than the max (%d)", __func__, 1211 cntxt_id, sc->sge.neq - 1)); 1212 sc->sge.eqmap[cntxt_id] = (void *)fl; 1213 1214 FL_LOCK(fl); 1215 refill_fl(sc, fl, -1, 8); 1216 FL_UNLOCK(fl); 1217 } 1218 1219 /* Enable IQ interrupts */ 1220 atomic_store_rel_32(&iq->state, IQS_IDLE); 1221 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) | 1222 V_INGRESSQID(iq->cntxt_id)); 1223 1224 return (0); 1225} 1226 1227/* 1228 * This can be called with the iq/fl in any state - fully allocated and 1229 * functional, partially allocated, even all-zeroed out. 1230 */ 1231static int 1232free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) 1233{ 1234 int i, rc; 1235 struct adapter *sc = iq->adapter; 1236 device_t dev; 1237 1238 if (sc == NULL) 1239 return (0); /* nothing to do */ 1240 1241 dev = pi ? pi->dev : sc->dev; 1242 1243 if (iq->flags & IQ_STARTED) { 1244 rc = -t4_iq_start_stop(sc, sc->mbox, 0, sc->pf, 0, 1245 iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); 1246 if (rc != 0) { 1247 device_printf(dev, 1248 "failed to stop queue %p: %d\n", iq, rc); 1249 return (rc); 1250 } 1251 iq->flags &= ~IQ_STARTED; 1252 1253 /* Synchronize with the interrupt handler */ 1254 while (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_DISABLED)) 1255 pause("iqfree", hz / 1000); 1256 } 1257 1258 if (iq->flags & IQ_ALLOCATED) { 1259 1260 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 1261 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 1262 fl ? fl->cntxt_id : 0xffff, 0xffff); 1263 if (rc != 0) { 1264 device_printf(dev, 1265 "failed to free queue %p: %d\n", iq, rc); 1266 return (rc); 1267 } 1268 iq->flags &= ~IQ_ALLOCATED; 1269 } 1270 1271 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 1272 1273 bzero(iq, sizeof(*iq)); 1274 1275 if (fl) { 1276 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 1277 fl->desc); 1278 1279 if (fl->sdesc) { 1280 FL_LOCK(fl); 1281 free_fl_sdesc(fl); 1282 FL_UNLOCK(fl); 1283 } 1284 1285 if (mtx_initialized(&fl->fl_lock)) 1286 mtx_destroy(&fl->fl_lock); 1287 1288 for (i = 0; i < FL_BUF_SIZES; i++) { 1289 if (fl->tag[i]) 1290 bus_dma_tag_destroy(fl->tag[i]); 1291 } 1292 1293 bzero(fl, sizeof(*fl)); 1294 } 1295 1296 return (0); 1297} 1298 1299static int 1300alloc_iq(struct sge_iq *iq, int intr_idx) 1301{ 1302 return alloc_iq_fl(NULL, iq, NULL, intr_idx); 1303} 1304 1305static int 1306free_iq(struct sge_iq *iq) 1307{ 1308 return free_iq_fl(NULL, iq, NULL); 1309} 1310 1311static int 1312alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx) 1313{ 1314 int rc; 1315 struct sysctl_oid *oid; 1316 struct sysctl_oid_list *children; 1317 char name[16]; 1318 1319 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx); 1320 if (rc != 0) 1321 return (rc); 1322 1323#ifdef INET 1324 rc = tcp_lro_init(&rxq->lro); 1325 if (rc != 0) 1326 return (rc); 1327 rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */ 1328 1329 if (pi->ifp->if_capenable & IFCAP_LRO) 1330 rxq->flags |= RXQ_LRO_ENABLED; 1331#endif 1332 rxq->ifp = pi->ifp; 1333 1334 children = SYSCTL_CHILDREN(pi->oid_rxq); 1335 1336 snprintf(name, sizeof(name), "%d", idx); 1337 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1338 NULL, "rx queue"); 1339 children = SYSCTL_CHILDREN(oid); 1340 1341#ifdef INET 1342 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 1343 &rxq->lro.lro_queued, 0, NULL); 1344 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 1345 &rxq->lro.lro_flushed, 0, NULL); 1346#endif 1347 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 1348 &rxq->rxcsum, "# of times hardware assisted with checksum"); 1349 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction", 1350 CTLFLAG_RD, &rxq->vlan_extraction, 1351 "# of times hardware extracted 802.1Q tag"); 1352 1353 return (rc); 1354} 1355 1356static int 1357free_rxq(struct port_info *pi, struct sge_rxq *rxq) 1358{ 1359 int rc; 1360 1361#ifdef INET 1362 if (rxq->lro.ifp) { 1363 tcp_lro_free(&rxq->lro); 1364 rxq->lro.ifp = NULL; 1365 } 1366#endif 1367 1368 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl); 1369 if (rc == 0) 1370 bzero(rxq, sizeof(*rxq)); 1371 1372 return (rc); 1373} 1374 1375static int 1376alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx) 1377{ 1378 int rc, cntxt_id; 1379 size_t len; 1380 struct fw_eq_ctrl_cmd c; 1381 struct sge_eq *eq = &ctrlq->eq; 1382 char name[16]; 1383 struct sysctl_oid *oid; 1384 struct sysctl_oid_list *children; 1385 1386 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 1387 1388 len = eq->qsize * CTRL_EQ_ESIZE; 1389 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 1390 &eq->ba, (void **)&eq->desc); 1391 if (rc) 1392 return (rc); 1393 1394 eq->cap = eq->qsize - SPG_LEN / CTRL_EQ_ESIZE; 1395 eq->spg = (void *)&eq->desc[eq->cap]; 1396 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ 1397 eq->iqid = sc->sge.fwq.cntxt_id; 1398 1399 bzero(&c, sizeof(c)); 1400 1401 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 1402 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 1403 V_FW_EQ_CTRL_CMD_VFN(0)); 1404 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 1405 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 1406 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */ 1407 c.physeqid_pkd = htobe32(0); 1408 c.fetchszm_to_iqid = 1409 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 1410 V_FW_EQ_CTRL_CMD_PCIECHN(idx) | 1411 V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 1412 c.dcaen_to_eqsize = 1413 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1414 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1415 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1416 V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize)); 1417 c.eqaddr = htobe64(eq->ba); 1418 1419 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1420 if (rc != 0) { 1421 device_printf(sc->dev, 1422 "failed to create control queue %d: %d\n", idx, rc); 1423 return (rc); 1424 } 1425 1426 eq->pidx = eq->cidx = 0; 1427 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 1428 eq->flags |= (EQ_ALLOCATED | EQ_STARTED); 1429 1430 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 1431 KASSERT(cntxt_id < sc->sge.neq, 1432 ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 1433 cntxt_id, sc->sge.neq - 1)); 1434 sc->sge.eqmap[cntxt_id] = eq; 1435 1436 children = SYSCTL_CHILDREN(sc->oid_ctrlq); 1437 1438 snprintf(name, sizeof(name), "%d", idx); 1439 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1440 NULL, "ctrl queue"); 1441 children = SYSCTL_CHILDREN(oid); 1442 1443 SYSCTL_ADD_UQUAD(&sc->ctx, children, OID_AUTO, "total_wrs", CTLFLAG_RD, 1444 &ctrlq->total_wrs, "total # of work requests"); 1445 SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 1446 &ctrlq->no_desc, 0, 1447 "# of times ctrlq ran out of hardware descriptors"); 1448 SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "too_long", CTLFLAG_RD, 1449 &ctrlq->too_long, 0, "# of oversized work requests"); 1450 1451 return (rc); 1452} 1453 1454static int 1455free_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq) 1456{ 1457 int rc; 1458 struct sge_eq *eq = &ctrlq->eq; 1459 1460 if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) { 1461 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 1462 if (rc != 0) { 1463 device_printf(sc->dev, 1464 "failed to free ctrl queue %p: %d\n", eq, rc); 1465 return (rc); 1466 } 1467 eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED); 1468 } 1469 1470 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 1471 1472 if (mtx_initialized(&eq->eq_lock)) 1473 mtx_destroy(&eq->eq_lock); 1474 1475 bzero(ctrlq, sizeof(*ctrlq)); 1476 return (0); 1477} 1478 1479static int 1480alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) 1481{ 1482 int rc, cntxt_id; 1483 size_t len; 1484 struct adapter *sc = pi->adapter; 1485 struct fw_eq_eth_cmd c; 1486 struct sge_eq *eq = &txq->eq; 1487 char name[16]; 1488 struct sysctl_oid *oid; 1489 struct sysctl_oid_list *children; 1490 1491 txq->ifp = pi->ifp; 1492 TASK_INIT(&txq->resume_tx, 0, cxgbe_txq_start, txq); 1493 1494 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 1495 1496 len = eq->qsize * TX_EQ_ESIZE; 1497 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 1498 &eq->ba, (void **)&eq->desc); 1499 if (rc) 1500 return (rc); 1501 1502 eq->cap = eq->qsize - SPG_LEN / TX_EQ_ESIZE; 1503 eq->spg = (void *)&eq->desc[eq->cap]; 1504 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ 1505 txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE, 1506 M_ZERO | M_WAITOK); 1507 txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock); 1508 eq->iqid = sc->sge.rxq[pi->first_rxq].iq.cntxt_id; 1509 1510 rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR, 1511 BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS, 1512 BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag); 1513 if (rc != 0) { 1514 device_printf(sc->dev, 1515 "failed to create tx DMA tag: %d\n", rc); 1516 return (rc); 1517 } 1518 1519 rc = alloc_tx_maps(txq); 1520 if (rc != 0) { 1521 device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc); 1522 return (rc); 1523 } 1524 1525 bzero(&c, sizeof(c)); 1526 1527 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 1528 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 1529 V_FW_EQ_ETH_CMD_VFN(0)); 1530 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 1531 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 1532 c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid)); 1533 c.fetchszm_to_iqid = 1534 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 1535 V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | 1536 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 1537 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1538 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1539 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1540 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)); 1541 c.eqaddr = htobe64(eq->ba); 1542 1543 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1544 if (rc != 0) { 1545 device_printf(pi->dev, 1546 "failed to create egress queue: %d\n", rc); 1547 return (rc); 1548 } 1549 1550 eq->pidx = eq->cidx = 0; 1551 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 1552 eq->flags |= (EQ_ALLOCATED | EQ_STARTED); 1553 1554 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 1555 KASSERT(cntxt_id < sc->sge.neq, 1556 ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 1557 cntxt_id, sc->sge.neq - 1)); 1558 sc->sge.eqmap[cntxt_id] = eq; 1559 1560 children = SYSCTL_CHILDREN(pi->oid_txq); 1561 1562 snprintf(name, sizeof(name), "%d", idx); 1563 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1564 NULL, "tx queue"); 1565 children = SYSCTL_CHILDREN(oid); 1566 1567 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 1568 &txq->txcsum, "# of times hardware assisted with checksum"); 1569 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion", 1570 CTLFLAG_RD, &txq->vlan_insertion, 1571 "# of times hardware inserted 802.1Q tag"); 1572 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 1573 &txq->tso_wrs, "# of IPv4 TSO work requests"); 1574 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 1575 &txq->imm_wrs, "# of work requests with immediate data"); 1576 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 1577 &txq->sgl_wrs, "# of work requests with direct SGL"); 1578 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 1579 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 1580 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD, 1581 &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)"); 1582 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD, 1583 &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests"); 1584 1585 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD, 1586 &txq->no_dmamap, 0, "# of times txq ran out of DMA maps"); 1587 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 1588 &txq->no_desc, 0, "# of times txq ran out of hardware descriptors"); 1589 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD, 1590 &txq->egr_update, 0, "egress update notifications from the SGE"); 1591 1592 return (rc); 1593} 1594 1595static int 1596free_txq(struct port_info *pi, struct sge_txq *txq) 1597{ 1598 int rc; 1599 struct adapter *sc = pi->adapter; 1600 struct sge_eq *eq = &txq->eq; 1601 1602 if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) { 1603 1604 /* 1605 * Wait for the response to a credit flush if there's one 1606 * pending. Clearing the flag tells handle_sge_egr_update or 1607 * cxgbe_txq_start (depending on how far the response has made 1608 * it) that they should ignore the response and wake up free_txq 1609 * instead. 1610 * 1611 * The interface has been marked down by the time we get here 1612 * (both IFF_UP and IFF_DRV_RUNNING cleared). qflush has 1613 * emptied the tx buf_rings and we know nothing new is being 1614 * queued for tx so we don't have to worry about a new credit 1615 * flush request. 1616 */ 1617 TXQ_LOCK(txq); 1618 if (eq->flags & EQ_CRFLUSHED) { 1619 eq->flags &= ~EQ_CRFLUSHED; 1620 msleep(txq, &eq->eq_lock, 0, "crflush", 0); 1621 } 1622 TXQ_UNLOCK(txq); 1623 1624 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 1625 if (rc != 0) { 1626 device_printf(pi->dev, 1627 "failed to free egress queue %p: %d\n", eq, rc); 1628 return (rc); 1629 } 1630 eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED); 1631 } 1632 1633 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 1634 1635 free(txq->sdesc, M_CXGBE); 1636 1637 if (txq->maps) 1638 free_tx_maps(txq); 1639 1640 buf_ring_free(txq->br, M_CXGBE); 1641 1642 if (txq->tx_tag) 1643 bus_dma_tag_destroy(txq->tx_tag); 1644 1645 if (mtx_initialized(&eq->eq_lock)) 1646 mtx_destroy(&eq->eq_lock); 1647 1648 bzero(txq, sizeof(*txq)); 1649 return (0); 1650} 1651 1652static void 1653oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 1654{ 1655 bus_addr_t *ba = arg; 1656 1657 KASSERT(nseg == 1, 1658 ("%s meant for single segment mappings only.", __func__)); 1659 1660 *ba = error ? 0 : segs->ds_addr; 1661} 1662 1663static inline bool 1664is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl) 1665{ 1666 *ctrl = (void *)((uintptr_t)iq->cdesc + 1667 (iq->esize - sizeof(struct rsp_ctrl))); 1668 1669 return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen); 1670} 1671 1672static inline void 1673iq_next(struct sge_iq *iq) 1674{ 1675 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize); 1676 if (__predict_false(++iq->cidx == iq->qsize - 1)) { 1677 iq->cidx = 0; 1678 iq->gen ^= 1; 1679 iq->cdesc = iq->desc; 1680 } 1681} 1682 1683#define FL_HW_IDX(x) ((x) >> 3) 1684static inline void 1685ring_fl_db(struct adapter *sc, struct sge_fl *fl) 1686{ 1687 int ndesc = fl->pending / 8; 1688 1689 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx)) 1690 ndesc--; /* hold back one credit */ 1691 1692 if (ndesc <= 0) 1693 return; /* nothing to do */ 1694 1695 wmb(); 1696 1697 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO | 1698 V_QID(fl->cntxt_id) | V_PIDX(ndesc)); 1699 fl->pending -= ndesc * 8; 1700} 1701 1702/* 1703 * Fill up the freelist by upto nbufs and ring its doorbell if the number of 1704 * buffers ready to be handed to the hardware >= dbthresh. 1705 */ 1706static void 1707refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs, int dbthresh) 1708{ 1709 __be64 *d = &fl->desc[fl->pidx]; 1710 struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; 1711 bus_dma_tag_t tag; 1712 bus_addr_t pa; 1713 caddr_t cl; 1714 int rc; 1715 1716 FL_LOCK_ASSERT_OWNED(fl); 1717 1718 if (nbufs < 0 || nbufs > fl->needed) 1719 nbufs = fl->needed; 1720 1721 while (nbufs--) { 1722 1723 if (sd->cl != NULL) { 1724 1725 /* 1726 * This happens when a frame small enough to fit 1727 * entirely in an mbuf was received in cl last time. 1728 * We'd held on to cl and can reuse it now. Note that 1729 * we reuse a cluster of the old size if fl->tag_idx is 1730 * no longer the same as sd->tag_idx. 1731 */ 1732 1733 KASSERT(*d == sd->ba_tag, 1734 ("%s: recyling problem at pidx %d", 1735 __func__, fl->pidx)); 1736 1737 d++; 1738 goto recycled; 1739 } 1740 1741 1742 if (fl->tag_idx != sd->tag_idx) { 1743 bus_dmamap_t map; 1744 bus_dma_tag_t newtag = fl->tag[fl->tag_idx]; 1745 bus_dma_tag_t oldtag = fl->tag[sd->tag_idx]; 1746 1747 /* 1748 * An MTU change can get us here. Discard the old map 1749 * which was created with the old tag, but only if 1750 * we're able to get a new one. 1751 */ 1752 rc = bus_dmamap_create(newtag, 0, &map); 1753 if (rc == 0) { 1754 bus_dmamap_destroy(oldtag, sd->map); 1755 sd->map = map; 1756 sd->tag_idx = fl->tag_idx; 1757 } 1758 } 1759 1760 tag = fl->tag[sd->tag_idx]; 1761 1762 cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx)); 1763 if (cl == NULL) 1764 break; 1765 1766 rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx), 1767 oneseg_dma_callback, &pa, 0); 1768 if (rc != 0 || pa == 0) { 1769 fl->dmamap_failed++; 1770 uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl); 1771 break; 1772 } 1773 1774 sd->cl = cl; 1775 *d++ = htobe64(pa | sd->tag_idx); 1776 1777#ifdef INVARIANTS 1778 sd->ba_tag = htobe64(pa | sd->tag_idx); 1779#endif 1780 1781recycled: 1782 /* sd->m is never recycled, should always be NULL */ 1783 KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__)); 1784 1785 sd->m = m_gethdr(M_NOWAIT, MT_NOINIT); 1786 if (sd->m == NULL) 1787 break; 1788 1789 fl->pending++; 1790 fl->needed--; 1791 sd++; 1792 if (++fl->pidx == fl->cap) { 1793 fl->pidx = 0; 1794 sd = fl->sdesc; 1795 d = fl->desc; 1796 } 1797 } 1798 1799 if (fl->pending >= dbthresh) 1800 ring_fl_db(sc, fl); 1801} 1802 1803static int 1804alloc_fl_sdesc(struct sge_fl *fl) 1805{ 1806 struct fl_sdesc *sd; 1807 bus_dma_tag_t tag; 1808 int i, rc; 1809 1810 FL_LOCK_ASSERT_OWNED(fl); 1811 1812 fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE, 1813 M_ZERO | M_WAITOK); 1814 1815 tag = fl->tag[fl->tag_idx]; 1816 sd = fl->sdesc; 1817 for (i = 0; i < fl->cap; i++, sd++) { 1818 1819 sd->tag_idx = fl->tag_idx; 1820 rc = bus_dmamap_create(tag, 0, &sd->map); 1821 if (rc != 0) 1822 goto failed; 1823 } 1824 1825 return (0); 1826failed: 1827 while (--i >= 0) { 1828 sd--; 1829 bus_dmamap_destroy(tag, sd->map); 1830 if (sd->m) { 1831 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0); 1832 m_free(sd->m); 1833 sd->m = NULL; 1834 } 1835 } 1836 KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__)); 1837 1838 free(fl->sdesc, M_CXGBE); 1839 fl->sdesc = NULL; 1840 1841 return (rc); 1842} 1843 1844static void 1845free_fl_sdesc(struct sge_fl *fl) 1846{ 1847 struct fl_sdesc *sd; 1848 int i; 1849 1850 FL_LOCK_ASSERT_OWNED(fl); 1851 1852 sd = fl->sdesc; 1853 for (i = 0; i < fl->cap; i++, sd++) { 1854 1855 if (sd->m) { 1856 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0); 1857 m_free(sd->m); 1858 sd->m = NULL; 1859 } 1860 1861 if (sd->cl) { 1862 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 1863 uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl); 1864 sd->cl = NULL; 1865 } 1866 1867 bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map); 1868 } 1869 1870 free(fl->sdesc, M_CXGBE); 1871 fl->sdesc = NULL; 1872} 1873 1874static int 1875alloc_tx_maps(struct sge_txq *txq) 1876{ 1877 struct tx_map *txm; 1878 int i, rc, count; 1879 1880 /* 1881 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE 1882 * limit for any WR). txq->no_dmamap events shouldn't occur if maps is 1883 * sized for the worst case. 1884 */ 1885 count = txq->eq.qsize * 10 / 8; 1886 txq->map_total = txq->map_avail = count; 1887 txq->map_cidx = txq->map_pidx = 0; 1888 1889 txq->maps = malloc(count * sizeof(struct tx_map), M_CXGBE, 1890 M_ZERO | M_WAITOK); 1891 1892 txm = txq->maps; 1893 for (i = 0; i < count; i++, txm++) { 1894 rc = bus_dmamap_create(txq->tx_tag, 0, &txm->map); 1895 if (rc != 0) 1896 goto failed; 1897 } 1898 1899 return (0); 1900failed: 1901 while (--i >= 0) { 1902 txm--; 1903 bus_dmamap_destroy(txq->tx_tag, txm->map); 1904 } 1905 KASSERT(txm == txq->maps, ("%s: EDOOFUS", __func__)); 1906 1907 free(txq->maps, M_CXGBE); 1908 txq->maps = NULL; 1909 1910 return (rc); 1911} 1912 1913static void 1914free_tx_maps(struct sge_txq *txq) 1915{ 1916 struct tx_map *txm; 1917 int i; 1918 1919 txm = txq->maps; 1920 for (i = 0; i < txq->map_total; i++, txm++) { 1921 1922 if (txm->m) { 1923 bus_dmamap_unload(txq->tx_tag, txm->map); 1924 m_freem(txm->m); 1925 txm->m = NULL; 1926 } 1927 1928 bus_dmamap_destroy(txq->tx_tag, txm->map); 1929 } 1930 1931 free(txq->maps, M_CXGBE); 1932 txq->maps = NULL; 1933} 1934 1935/* 1936 * We'll do immediate data tx for non-TSO, but only when not coalescing. We're 1937 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes 1938 * of immediate data. 1939 */ 1940#define IMM_LEN ( \ 1941 2 * TX_EQ_ESIZE \ 1942 - sizeof(struct fw_eth_tx_pkt_wr) \ 1943 - sizeof(struct cpl_tx_pkt_core)) 1944 1945/* 1946 * Returns non-zero on failure, no need to cleanup anything in that case. 1947 * 1948 * Note 1: We always try to defrag the mbuf if required and return EFBIG only 1949 * if the resulting chain still won't fit in a tx descriptor. 1950 * 1951 * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf 1952 * does not have the TCP header in it. 1953 */ 1954static int 1955get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl, 1956 int sgl_only) 1957{ 1958 struct mbuf *m = *fp; 1959 struct tx_map *txm; 1960 int rc, defragged = 0, n; 1961 1962 TXQ_LOCK_ASSERT_OWNED(txq); 1963 1964 if (m->m_pkthdr.tso_segsz) 1965 sgl_only = 1; /* Do not allow immediate data with LSO */ 1966 1967start: sgl->nsegs = 0; 1968 1969 if (m->m_pkthdr.len <= IMM_LEN && !sgl_only) 1970 return (0); /* nsegs = 0 tells caller to use imm. tx */ 1971 1972 if (txq->map_avail == 0) { 1973 txq->no_dmamap++; 1974 return (ENOMEM); 1975 } 1976 txm = &txq->maps[txq->map_pidx]; 1977 1978 if (m->m_pkthdr.tso_segsz && m->m_len < 50) { 1979 *fp = m_pullup(m, 50); 1980 m = *fp; 1981 if (m == NULL) 1982 return (ENOBUFS); 1983 } 1984 1985 rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg, 1986 &sgl->nsegs, BUS_DMA_NOWAIT); 1987 if (rc == EFBIG && defragged == 0) { 1988 m = m_defrag(m, M_DONTWAIT); 1989 if (m == NULL) 1990 return (EFBIG); 1991 1992 defragged = 1; 1993 *fp = m; 1994 goto start; 1995 } 1996 if (rc != 0) 1997 return (rc); 1998 1999 txm->m = m; 2000 txq->map_avail--; 2001 if (++txq->map_pidx == txq->map_total) 2002 txq->map_pidx = 0; 2003 2004 KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS, 2005 ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs)); 2006 2007 /* 2008 * Store the # of flits required to hold this frame's SGL in nflits. An 2009 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by 2010 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used 2011 * then len1 must be set to 0. 2012 */ 2013 n = sgl->nsegs - 1; 2014 sgl->nflits = (3 * n) / 2 + (n & 1) + 2; 2015 2016 return (0); 2017} 2018 2019 2020/* 2021 * Releases all the txq resources used up in the specified sgl. 2022 */ 2023static int 2024free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl) 2025{ 2026 struct tx_map *txm; 2027 2028 TXQ_LOCK_ASSERT_OWNED(txq); 2029 2030 if (sgl->nsegs == 0) 2031 return (0); /* didn't use any map */ 2032 2033 /* 1 pkt uses exactly 1 map, back it out */ 2034 2035 txq->map_avail++; 2036 if (txq->map_pidx > 0) 2037 txq->map_pidx--; 2038 else 2039 txq->map_pidx = txq->map_total - 1; 2040 2041 txm = &txq->maps[txq->map_pidx]; 2042 bus_dmamap_unload(txq->tx_tag, txm->map); 2043 txm->m = NULL; 2044 2045 return (0); 2046} 2047 2048static int 2049write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m, 2050 struct sgl *sgl) 2051{ 2052 struct sge_eq *eq = &txq->eq; 2053 struct fw_eth_tx_pkt_wr *wr; 2054 struct cpl_tx_pkt_core *cpl; 2055 uint32_t ctrl; /* used in many unrelated places */ 2056 uint64_t ctrl1; 2057 int nflits, ndesc, pktlen; 2058 struct tx_sdesc *txsd; 2059 caddr_t dst; 2060 2061 TXQ_LOCK_ASSERT_OWNED(txq); 2062 2063 pktlen = m->m_pkthdr.len; 2064 2065 /* 2066 * Do we have enough flits to send this frame out? 2067 */ 2068 ctrl = sizeof(struct cpl_tx_pkt_core); 2069 if (m->m_pkthdr.tso_segsz) { 2070 nflits = TXPKT_LSO_WR_HDR; 2071 ctrl += sizeof(struct cpl_tx_pkt_lso); 2072 } else 2073 nflits = TXPKT_WR_HDR; 2074 if (sgl->nsegs > 0) 2075 nflits += sgl->nflits; 2076 else { 2077 nflits += howmany(pktlen, 8); 2078 ctrl += pktlen; 2079 } 2080 ndesc = howmany(nflits, 8); 2081 if (ndesc > eq->avail) 2082 return (ENOMEM); 2083 2084 /* Firmware work request header */ 2085 wr = (void *)&eq->desc[eq->pidx]; 2086 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 2087 V_FW_WR_IMMDLEN(ctrl)); 2088 ctrl = V_FW_WR_LEN16(howmany(nflits, 2)); 2089 if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) { 2090 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2091 eq->flags |= EQ_CRFLUSHED; 2092 } 2093 2094 wr->equiq_to_len16 = htobe32(ctrl); 2095 wr->r3 = 0; 2096 2097 if (m->m_pkthdr.tso_segsz) { 2098 struct cpl_tx_pkt_lso *lso = (void *)(wr + 1); 2099 struct ether_header *eh; 2100 struct ip *ip; 2101 struct tcphdr *tcp; 2102 2103 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 2104 F_LSO_LAST_SLICE; 2105 2106 eh = mtod(m, struct ether_header *); 2107 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2108 ctrl |= V_LSO_ETHHDR_LEN(1); 2109 ip = (void *)((struct ether_vlan_header *)eh + 1); 2110 } else 2111 ip = (void *)(eh + 1); 2112 2113 tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4); 2114 ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) | 2115 V_LSO_TCPHDR_LEN(tcp->th_off); 2116 2117 lso->lso_ctrl = htobe32(ctrl); 2118 lso->ipid_ofst = htobe16(0); 2119 lso->mss = htobe16(m->m_pkthdr.tso_segsz); 2120 lso->seqno_offset = htobe32(0); 2121 lso->len = htobe32(pktlen); 2122 2123 cpl = (void *)(lso + 1); 2124 2125 txq->tso_wrs++; 2126 } else 2127 cpl = (void *)(wr + 1); 2128 2129 /* Checksum offload */ 2130 ctrl1 = 0; 2131 if (!(m->m_pkthdr.csum_flags & CSUM_IP)) 2132 ctrl1 |= F_TXPKT_IPCSUM_DIS; 2133 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) 2134 ctrl1 |= F_TXPKT_L4CSUM_DIS; 2135 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP)) 2136 txq->txcsum++; /* some hardware assistance provided */ 2137 2138 /* VLAN tag insertion */ 2139 if (m->m_flags & M_VLANTAG) { 2140 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 2141 txq->vlan_insertion++; 2142 } 2143 2144 /* CPL header */ 2145 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 2146 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 2147 cpl->pack = 0; 2148 cpl->len = htobe16(pktlen); 2149 cpl->ctrl1 = htobe64(ctrl1); 2150 2151 /* Software descriptor */ 2152 txsd = &txq->sdesc[eq->pidx]; 2153 txsd->desc_used = ndesc; 2154 2155 eq->pending += ndesc; 2156 eq->avail -= ndesc; 2157 eq->pidx += ndesc; 2158 if (eq->pidx >= eq->cap) 2159 eq->pidx -= eq->cap; 2160 2161 /* SGL */ 2162 dst = (void *)(cpl + 1); 2163 if (sgl->nsegs > 0) { 2164 txsd->credits = 1; 2165 txq->sgl_wrs++; 2166 write_sgl_to_txd(eq, sgl, &dst); 2167 } else { 2168 txsd->credits = 0; 2169 txq->imm_wrs++; 2170 for (; m; m = m->m_next) { 2171 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 2172#ifdef INVARIANTS 2173 pktlen -= m->m_len; 2174#endif 2175 } 2176#ifdef INVARIANTS 2177 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 2178#endif 2179 2180 } 2181 2182 txq->txpkt_wrs++; 2183 return (0); 2184} 2185 2186/* 2187 * Returns 0 to indicate that m has been accepted into a coalesced tx work 2188 * request. It has either been folded into txpkts or txpkts was flushed and m 2189 * has started a new coalesced work request (as the first frame in a fresh 2190 * txpkts). 2191 * 2192 * Returns non-zero to indicate a failure - caller is responsible for 2193 * transmitting m, if there was anything in txpkts it has been flushed. 2194 */ 2195static int 2196add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts, 2197 struct mbuf *m, struct sgl *sgl) 2198{ 2199 struct sge_eq *eq = &txq->eq; 2200 int can_coalesce; 2201 struct tx_sdesc *txsd; 2202 int flits; 2203 2204 TXQ_LOCK_ASSERT_OWNED(txq); 2205 2206 if (txpkts->npkt > 0) { 2207 flits = TXPKTS_PKT_HDR + sgl->nflits; 2208 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 2209 txpkts->nflits + flits <= TX_WR_FLITS && 2210 txpkts->nflits + flits <= eq->avail * 8 && 2211 txpkts->plen + m->m_pkthdr.len < 65536; 2212 2213 if (can_coalesce) { 2214 txpkts->npkt++; 2215 txpkts->nflits += flits; 2216 txpkts->plen += m->m_pkthdr.len; 2217 2218 txsd = &txq->sdesc[eq->pidx]; 2219 txsd->credits++; 2220 2221 return (0); 2222 } 2223 2224 /* 2225 * Couldn't coalesce m into txpkts. The first order of business 2226 * is to send txpkts on its way. Then we'll revisit m. 2227 */ 2228 write_txpkts_wr(txq, txpkts); 2229 } 2230 2231 /* 2232 * Check if we can start a new coalesced tx work request with m as 2233 * the first packet in it. 2234 */ 2235 2236 KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__)); 2237 2238 flits = TXPKTS_WR_HDR + sgl->nflits; 2239 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 2240 flits <= eq->avail * 8 && flits <= TX_WR_FLITS; 2241 2242 if (can_coalesce == 0) 2243 return (EINVAL); 2244 2245 /* 2246 * Start a fresh coalesced tx WR with m as the first frame in it. 2247 */ 2248 txpkts->npkt = 1; 2249 txpkts->nflits = flits; 2250 txpkts->flitp = &eq->desc[eq->pidx].flit[2]; 2251 txpkts->plen = m->m_pkthdr.len; 2252 2253 txsd = &txq->sdesc[eq->pidx]; 2254 txsd->credits = 1; 2255 2256 return (0); 2257} 2258 2259/* 2260 * Note that write_txpkts_wr can never run out of hardware descriptors (but 2261 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for 2262 * coalescing only if sufficient hardware descriptors are available. 2263 */ 2264static void 2265write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) 2266{ 2267 struct sge_eq *eq = &txq->eq; 2268 struct fw_eth_tx_pkts_wr *wr; 2269 struct tx_sdesc *txsd; 2270 uint32_t ctrl; 2271 int ndesc; 2272 2273 TXQ_LOCK_ASSERT_OWNED(txq); 2274 2275 ndesc = howmany(txpkts->nflits, 8); 2276 2277 wr = (void *)&eq->desc[eq->pidx]; 2278 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) | 2279 V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */ 2280 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2)); 2281 if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) { 2282 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2283 eq->flags |= EQ_CRFLUSHED; 2284 } 2285 wr->equiq_to_len16 = htobe32(ctrl); 2286 wr->plen = htobe16(txpkts->plen); 2287 wr->npkt = txpkts->npkt; 2288 wr->r3 = wr->r4 = 0; 2289 2290 /* Everything else already written */ 2291 2292 txsd = &txq->sdesc[eq->pidx]; 2293 txsd->desc_used = ndesc; 2294 2295 KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__)); 2296 2297 eq->pending += ndesc; 2298 eq->avail -= ndesc; 2299 eq->pidx += ndesc; 2300 if (eq->pidx >= eq->cap) 2301 eq->pidx -= eq->cap; 2302 2303 txq->txpkts_pkts += txpkts->npkt; 2304 txq->txpkts_wrs++; 2305 txpkts->npkt = 0; /* emptied */ 2306} 2307 2308static inline void 2309write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, 2310 struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl) 2311{ 2312 struct ulp_txpkt *ulpmc; 2313 struct ulptx_idata *ulpsc; 2314 struct cpl_tx_pkt_core *cpl; 2315 struct sge_eq *eq = &txq->eq; 2316 uintptr_t flitp, start, end; 2317 uint64_t ctrl; 2318 caddr_t dst; 2319 2320 KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__)); 2321 2322 start = (uintptr_t)eq->desc; 2323 end = (uintptr_t)eq->spg; 2324 2325 /* Checksum offload */ 2326 ctrl = 0; 2327 if (!(m->m_pkthdr.csum_flags & CSUM_IP)) 2328 ctrl |= F_TXPKT_IPCSUM_DIS; 2329 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) 2330 ctrl |= F_TXPKT_L4CSUM_DIS; 2331 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP)) 2332 txq->txcsum++; /* some hardware assistance provided */ 2333 2334 /* VLAN tag insertion */ 2335 if (m->m_flags & M_VLANTAG) { 2336 ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 2337 txq->vlan_insertion++; 2338 } 2339 2340 /* 2341 * The previous packet's SGL must have ended at a 16 byte boundary (this 2342 * is required by the firmware/hardware). It follows that flitp cannot 2343 * wrap around between the ULPTX master command and ULPTX subcommand (8 2344 * bytes each), and that it can not wrap around in the middle of the 2345 * cpl_tx_pkt_core either. 2346 */ 2347 flitp = (uintptr_t)txpkts->flitp; 2348 KASSERT((flitp & 0xf) == 0, 2349 ("%s: last SGL did not end at 16 byte boundary: %p", 2350 __func__, txpkts->flitp)); 2351 2352 /* ULP master command */ 2353 ulpmc = (void *)flitp; 2354 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) | 2355 V_ULP_TXPKT_FID(eq->iqid)); 2356 ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) + 2357 sizeof(*cpl) + 8 * sgl->nflits, 16)); 2358 2359 /* ULP subcommand */ 2360 ulpsc = (void *)(ulpmc + 1); 2361 ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) | 2362 F_ULP_TX_SC_MORE); 2363 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 2364 2365 flitp += sizeof(*ulpmc) + sizeof(*ulpsc); 2366 if (flitp == end) 2367 flitp = start; 2368 2369 /* CPL_TX_PKT */ 2370 cpl = (void *)flitp; 2371 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 2372 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 2373 cpl->pack = 0; 2374 cpl->len = htobe16(m->m_pkthdr.len); 2375 cpl->ctrl1 = htobe64(ctrl); 2376 2377 flitp += sizeof(*cpl); 2378 if (flitp == end) 2379 flitp = start; 2380 2381 /* SGL for this frame */ 2382 dst = (caddr_t)flitp; 2383 txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst); 2384 txpkts->flitp = (void *)dst; 2385 2386 KASSERT(((uintptr_t)dst & 0xf) == 0, 2387 ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst)); 2388} 2389 2390/* 2391 * If the SGL ends on an address that is not 16 byte aligned, this function will 2392 * add a 0 filled flit at the end. It returns 1 in that case. 2393 */ 2394static int 2395write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to) 2396{ 2397 __be64 *flitp, *end; 2398 struct ulptx_sgl *usgl; 2399 bus_dma_segment_t *seg; 2400 int i, padded; 2401 2402 KASSERT(sgl->nsegs > 0 && sgl->nflits > 0, 2403 ("%s: bad SGL - nsegs=%d, nflits=%d", 2404 __func__, sgl->nsegs, sgl->nflits)); 2405 2406 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 2407 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 2408 2409 flitp = (__be64 *)(*to); 2410 end = flitp + sgl->nflits; 2411 seg = &sgl->seg[0]; 2412 usgl = (void *)flitp; 2413 2414 /* 2415 * We start at a 16 byte boundary somewhere inside the tx descriptor 2416 * ring, so we're at least 16 bytes away from the status page. There is 2417 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 2418 */ 2419 2420 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 2421 V_ULPTX_NSGE(sgl->nsegs)); 2422 usgl->len0 = htobe32(seg->ds_len); 2423 usgl->addr0 = htobe64(seg->ds_addr); 2424 seg++; 2425 2426 if ((uintptr_t)end <= (uintptr_t)eq->spg) { 2427 2428 /* Won't wrap around at all */ 2429 2430 for (i = 0; i < sgl->nsegs - 1; i++, seg++) { 2431 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len); 2432 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr); 2433 } 2434 if (i & 1) 2435 usgl->sge[i / 2].len[1] = htobe32(0); 2436 } else { 2437 2438 /* Will wrap somewhere in the rest of the SGL */ 2439 2440 /* 2 flits already written, write the rest flit by flit */ 2441 flitp = (void *)(usgl + 1); 2442 for (i = 0; i < sgl->nflits - 2; i++) { 2443 if ((uintptr_t)flitp == (uintptr_t)eq->spg) 2444 flitp = (void *)eq->desc; 2445 *flitp++ = get_flit(seg, sgl->nsegs - 1, i); 2446 } 2447 end = flitp; 2448 } 2449 2450 if ((uintptr_t)end & 0xf) { 2451 *(uint64_t *)end = 0; 2452 end++; 2453 padded = 1; 2454 } else 2455 padded = 0; 2456 2457 if ((uintptr_t)end == (uintptr_t)eq->spg) 2458 *to = (void *)eq->desc; 2459 else 2460 *to = (void *)end; 2461 2462 return (padded); 2463} 2464 2465static inline void 2466copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 2467{ 2468 if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) { 2469 bcopy(from, *to, len); 2470 (*to) += len; 2471 } else { 2472 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to); 2473 2474 bcopy(from, *to, portion); 2475 from += portion; 2476 portion = len - portion; /* remaining */ 2477 bcopy(from, (void *)eq->desc, portion); 2478 (*to) = (caddr_t)eq->desc + portion; 2479 } 2480} 2481 2482static inline void 2483ring_eq_db(struct adapter *sc, struct sge_eq *eq) 2484{ 2485 wmb(); 2486 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), 2487 V_QID(eq->cntxt_id) | V_PIDX(eq->pending)); 2488 eq->pending = 0; 2489} 2490 2491static inline int 2492reclaimable(struct sge_eq *eq) 2493{ 2494 unsigned int cidx; 2495 2496 cidx = eq->spg->cidx; /* stable snapshot */ 2497 cidx = be16_to_cpu(cidx); 2498 2499 if (cidx >= eq->cidx) 2500 return (cidx - eq->cidx); 2501 else 2502 return (cidx + eq->cap - eq->cidx); 2503} 2504 2505/* 2506 * There are "can_reclaim" tx descriptors ready to be reclaimed. Reclaim as 2507 * many as possible but stop when there are around "n" mbufs to free. 2508 * 2509 * The actual number reclaimed is provided as the return value. 2510 */ 2511static int 2512reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n) 2513{ 2514 struct tx_sdesc *txsd; 2515 struct tx_map *txm; 2516 unsigned int reclaimed, maps; 2517 struct sge_eq *eq = &txq->eq; 2518 2519 EQ_LOCK_ASSERT_OWNED(eq); 2520 2521 if (can_reclaim == 0) 2522 can_reclaim = reclaimable(eq); 2523 2524 maps = reclaimed = 0; 2525 while (can_reclaim && maps < n) { 2526 int ndesc; 2527 2528 txsd = &txq->sdesc[eq->cidx]; 2529 ndesc = txsd->desc_used; 2530 2531 /* Firmware doesn't return "partial" credits. */ 2532 KASSERT(can_reclaim >= ndesc, 2533 ("%s: unexpected number of credits: %d, %d", 2534 __func__, can_reclaim, ndesc)); 2535 2536 maps += txsd->credits; 2537 2538 reclaimed += ndesc; 2539 can_reclaim -= ndesc; 2540 2541 eq->cidx += ndesc; 2542 if (__predict_false(eq->cidx >= eq->cap)) 2543 eq->cidx -= eq->cap; 2544 } 2545 2546 txm = &txq->maps[txq->map_cidx]; 2547 if (maps) 2548 prefetch(txm->m); 2549 2550 eq->avail += reclaimed; 2551 KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */ 2552 ("%s: too many descriptors available", __func__)); 2553 2554 txq->map_avail += maps; 2555 KASSERT(txq->map_avail <= txq->map_total, 2556 ("%s: too many maps available", __func__)); 2557 2558 while (maps--) { 2559 struct tx_map *next; 2560 2561 next = txm + 1; 2562 if (__predict_false(txq->map_cidx + 1 == txq->map_total)) 2563 next = txq->maps; 2564 prefetch(next->m); 2565 2566 bus_dmamap_unload(txq->tx_tag, txm->map); 2567 m_freem(txm->m); 2568 txm->m = NULL; 2569 2570 txm = next; 2571 if (__predict_false(++txq->map_cidx == txq->map_total)) 2572 txq->map_cidx = 0; 2573 } 2574 2575 return (reclaimed); 2576} 2577 2578static void 2579write_eqflush_wr(struct sge_eq *eq) 2580{ 2581 struct fw_eq_flush_wr *wr; 2582 2583 EQ_LOCK_ASSERT_OWNED(eq); 2584 KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__)); 2585 2586 wr = (void *)&eq->desc[eq->pidx]; 2587 bzero(wr, sizeof(*wr)); 2588 wr->opcode = FW_EQ_FLUSH_WR; 2589 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) | 2590 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 2591 2592 eq->flags |= EQ_CRFLUSHED; 2593 eq->pending++; 2594 eq->avail--; 2595 if (++eq->pidx == eq->cap) 2596 eq->pidx = 0; 2597} 2598 2599static __be64 2600get_flit(bus_dma_segment_t *sgl, int nsegs, int idx) 2601{ 2602 int i = (idx / 3) * 2; 2603 2604 switch (idx % 3) { 2605 case 0: { 2606 __be64 rc; 2607 2608 rc = htobe32(sgl[i].ds_len); 2609 if (i + 1 < nsegs) 2610 rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32; 2611 2612 return (rc); 2613 } 2614 case 1: 2615 return htobe64(sgl[i].ds_addr); 2616 case 2: 2617 return htobe64(sgl[i + 1].ds_addr); 2618 } 2619 2620 return (0); 2621} 2622 2623static void 2624set_fl_tag_idx(struct sge_fl *fl, int mtu) 2625{ 2626 int i; 2627 2628 FL_LOCK_ASSERT_OWNED(fl); 2629 2630 for (i = 0; i < FL_BUF_SIZES - 1; i++) { 2631 if (FL_BUF_SIZE(i) >= (mtu + FL_PKTSHIFT)) 2632 break; 2633 } 2634 2635 fl->tag_idx = i; 2636} 2637 2638static int 2639handle_sge_egr_update(struct adapter *sc, const struct cpl_sge_egr_update *cpl) 2640{ 2641 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 2642 struct sge *s = &sc->sge; 2643 struct sge_txq *txq; 2644 struct port_info *pi; 2645 2646 txq = (void *)s->eqmap[qid - s->eq_start]; 2647 TXQ_LOCK(txq); 2648 if (txq->eq.flags & EQ_CRFLUSHED) { 2649 pi = txq->ifp->if_softc; 2650 taskqueue_enqueue(pi->tq, &txq->resume_tx); 2651 txq->egr_update++; 2652 } else 2653 wakeup_one(txq); /* txq is going away, wakeup free_txq */ 2654 TXQ_UNLOCK(txq); 2655 2656 return (0); 2657} 2658 2659/* 2660 * m0 is freed on successful transmission. 2661 */ 2662static int 2663ctrl_tx(struct adapter *sc, struct sge_ctrlq *ctrlq, struct mbuf *m0) 2664{ 2665 struct sge_eq *eq = &ctrlq->eq; 2666 int rc = 0, ndesc; 2667 int can_reclaim; 2668 caddr_t dst; 2669 struct mbuf *m; 2670 2671 M_ASSERTPKTHDR(m0); 2672 2673 if (m0->m_pkthdr.len > SGE_MAX_WR_LEN) { 2674 ctrlq->too_long++; 2675 return (EMSGSIZE); 2676 } 2677 ndesc = howmany(m0->m_pkthdr.len, CTRL_EQ_ESIZE); 2678 2679 EQ_LOCK(eq); 2680 2681 can_reclaim = reclaimable(eq); 2682 eq->cidx += can_reclaim; 2683 eq->avail += can_reclaim; 2684 if (__predict_false(eq->cidx >= eq->cap)) 2685 eq->cidx -= eq->cap; 2686 2687 if (eq->avail < ndesc) { 2688 rc = EAGAIN; 2689 ctrlq->no_desc++; 2690 goto failed; 2691 } 2692 2693 dst = (void *)&eq->desc[eq->pidx]; 2694 for (m = m0; m; m = m->m_next) 2695 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 2696 2697 eq->pidx += ndesc; 2698 if (__predict_false(eq->pidx >= eq->cap)) 2699 eq->pidx -= eq->cap; 2700 2701 eq->pending += ndesc; 2702 ctrlq->total_wrs++; 2703 ring_eq_db(sc, eq); 2704failed: 2705 EQ_UNLOCK(eq); 2706 if (rc == 0) 2707 m_freem(m0); 2708 2709 return (rc); 2710} 2711