t4_sge.c revision 218792
1/*- 2 * Copyright (c) 2011 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_sge.c 218792 2011-02-18 08:00:26Z np $"); 30 31#include "opt_inet.h" 32 33#include <sys/types.h> 34#include <sys/mbuf.h> 35#include <sys/socket.h> 36#include <sys/kernel.h> 37#include <sys/sysctl.h> 38#include <net/bpf.h> 39#include <net/ethernet.h> 40#include <net/if.h> 41#include <net/if_vlan_var.h> 42#include <netinet/in.h> 43#include <netinet/ip.h> 44#include <netinet/tcp.h> 45 46#include "common/common.h" 47#include "common/t4_regs.h" 48#include "common/t4_regs_values.h" 49#include "common/t4_msg.h" 50#include "common/t4fw_interface.h" 51 52struct fl_buf_info { 53 int size; 54 int type; 55 uma_zone_t zone; 56}; 57 58/* t4_sge_init will fill up the zone */ 59static struct fl_buf_info fl_buf_info[FL_BUF_SIZES] = { 60 { MCLBYTES, EXT_CLUSTER, NULL}, 61 { MJUMPAGESIZE, EXT_JUMBOP, NULL}, 62 { MJUM9BYTES, EXT_JUMBO9, NULL}, 63 { MJUM16BYTES, EXT_JUMBO16, NULL} 64}; 65#define FL_BUF_SIZE(x) (fl_buf_info[x].size) 66#define FL_BUF_TYPE(x) (fl_buf_info[x].type) 67#define FL_BUF_ZONE(x) (fl_buf_info[x].zone) 68 69enum { 70 FL_PKTSHIFT = 2 71}; 72 73#define FL_ALIGN min(CACHE_LINE_SIZE, 32) 74#if CACHE_LINE_SIZE > 64 75#define SPG_LEN 128 76#else 77#define SPG_LEN 64 78#endif 79 80/* Used to track coalesced tx work request */ 81struct txpkts { 82 uint64_t *flitp; /* ptr to flit where next pkt should start */ 83 uint8_t npkt; /* # of packets in this work request */ 84 uint8_t nflits; /* # of flits used by this work request */ 85 uint16_t plen; /* total payload (sum of all packets) */ 86}; 87 88/* A packet's SGL. This + m_pkthdr has all info needed for tx */ 89struct sgl { 90 int nsegs; /* # of segments in the SGL, 0 means imm. tx */ 91 int nflits; /* # of flits needed for the SGL */ 92 bus_dma_segment_t seg[TX_SGL_SEGS]; 93}; 94 95static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, 96 int, iq_intr_handler_t *, char *); 97static inline void init_fl(struct sge_fl *, int, char *); 98static inline void init_txq(struct sge_txq *, int, char *); 99static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 100 bus_addr_t *, void **); 101static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 102 void *); 103static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, 104 int); 105static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); 106static int alloc_iq(struct sge_iq *, int); 107static int free_iq(struct sge_iq *); 108static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int); 109static int free_rxq(struct port_info *, struct sge_rxq *); 110static int alloc_txq(struct port_info *, struct sge_txq *, int); 111static int free_txq(struct port_info *, struct sge_txq *); 112static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 113static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **); 114static inline void iq_next(struct sge_iq *); 115static inline void ring_fl_db(struct adapter *, struct sge_fl *); 116static void refill_fl(struct sge_fl *, int); 117static int alloc_fl_sdesc(struct sge_fl *); 118static void free_fl_sdesc(struct sge_fl *); 119static int alloc_eq_maps(struct sge_eq *); 120static void free_eq_maps(struct sge_eq *); 121static struct mbuf *get_fl_sdesc_data(struct sge_fl *, int, int); 122static void set_fl_tag_idx(struct sge_fl *, int); 123 124static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int); 125static int free_pkt_sgl(struct sge_txq *, struct sgl *); 126static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *, 127 struct sgl *); 128static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *, 129 struct mbuf *, struct sgl *); 130static void write_txpkts_wr(struct sge_txq *, struct txpkts *); 131static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *, 132 struct txpkts *, struct mbuf *, struct sgl *); 133static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *); 134static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 135static inline void ring_tx_db(struct adapter *, struct sge_eq *); 136static int reclaim_tx_descs(struct sge_eq *, int, int); 137static void write_eqflush_wr(struct sge_eq *); 138static __be64 get_flit(bus_dma_segment_t *, int, int); 139 140/** 141 * t4_sge_init - initialize SGE 142 * @sc: the adapter 143 * 144 * Performs SGE initialization needed every time after a chip reset. 145 * We do not initialize any of the queues here, instead the driver 146 * top-level must request them individually. 147 */ 148void 149t4_sge_init(struct adapter *sc) 150{ 151 struct sge *s = &sc->sge; 152 int i; 153 154 FL_BUF_ZONE(0) = zone_clust; 155 FL_BUF_ZONE(1) = zone_jumbop; 156 FL_BUF_ZONE(2) = zone_jumbo9; 157 FL_BUF_ZONE(3) = zone_jumbo16; 158 159 t4_set_reg_field(sc, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT) | 160 V_INGPADBOUNDARY(M_INGPADBOUNDARY) | 161 F_EGRSTATUSPAGESIZE, 162 V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) | 163 V_PKTSHIFT(FL_PKTSHIFT) | 164 F_RXPKTCPLMODE | 165 V_EGRSTATUSPAGESIZE(SPG_LEN == 128)); 166 t4_set_reg_field(sc, A_SGE_HOST_PAGE_SIZE, 167 V_HOSTPAGESIZEPF0(M_HOSTPAGESIZEPF0), 168 V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10)); 169 170 for (i = 0; i < FL_BUF_SIZES; i++) { 171 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), 172 FL_BUF_SIZE(i)); 173 } 174 175 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, 176 V_THRESHOLD_0(s->counter_val[0]) | 177 V_THRESHOLD_1(s->counter_val[1]) | 178 V_THRESHOLD_2(s->counter_val[2]) | 179 V_THRESHOLD_3(s->counter_val[3])); 180 181 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, 182 V_TIMERVALUE0(us_to_core_ticks(sc, s->timer_val[0])) | 183 V_TIMERVALUE1(us_to_core_ticks(sc, s->timer_val[1]))); 184 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, 185 V_TIMERVALUE2(us_to_core_ticks(sc, s->timer_val[2])) | 186 V_TIMERVALUE3(us_to_core_ticks(sc, s->timer_val[3]))); 187 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, 188 V_TIMERVALUE4(us_to_core_ticks(sc, s->timer_val[4])) | 189 V_TIMERVALUE5(us_to_core_ticks(sc, s->timer_val[5]))); 190} 191 192int 193t4_create_dma_tag(struct adapter *sc) 194{ 195 int rc; 196 197 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 198 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 199 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 200 NULL, &sc->dmat); 201 if (rc != 0) { 202 device_printf(sc->dev, 203 "failed to create main DMA tag: %d\n", rc); 204 } 205 206 return (rc); 207} 208 209int 210t4_destroy_dma_tag(struct adapter *sc) 211{ 212 if (sc->dmat) 213 bus_dma_tag_destroy(sc->dmat); 214 215 return (0); 216} 217 218/* 219 * Allocate and initialize the firmware event queue and the forwarded interrupt 220 * queues, if any. The adapter owns all these queues as they are not associated 221 * with any particular port. 222 * 223 * Returns errno on failure. Resources allocated up to that point may still be 224 * allocated. Caller is responsible for cleanup in case this function fails. 225 */ 226int 227t4_setup_adapter_iqs(struct adapter *sc) 228{ 229 int i, rc; 230 struct sge_iq *iq, *fwq; 231 iq_intr_handler_t *handler; 232 char name[16]; 233 234 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 235 236 fwq = &sc->sge.fwq; 237 if (sc->flags & INTR_FWD) { 238 iq = &sc->sge.fiq[0]; 239 240 /* 241 * Forwarded interrupt queues - allocate 1 if there's only 1 242 * vector available, one less than the number of vectors 243 * otherwise (the first vector is reserved for the error 244 * interrupt in that case). 245 */ 246 i = sc->intr_count > 1 ? 1 : 0; 247 for (; i < sc->intr_count; i++, iq++) { 248 249 snprintf(name, sizeof(name), "%s fiq%d", 250 device_get_nameunit(sc->dev), i); 251 init_iq(iq, sc, 0, 0, (sc->sge.nrxq + 1) * 2, 16, NULL, 252 name); 253 254 rc = alloc_iq(iq, i); 255 if (rc != 0) { 256 device_printf(sc->dev, 257 "failed to create fwd intr queue %d: %d\n", 258 i, rc); 259 return (rc); 260 } 261 } 262 263 handler = t4_intr_evt; 264 i = 0; /* forward fwq's interrupt to the first fiq */ 265 } else { 266 handler = NULL; 267 i = 1; /* fwq should use vector 1 (0 is used by error) */ 268 } 269 270 snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev)); 271 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name); 272 rc = alloc_iq(fwq, i); 273 if (rc != 0) { 274 device_printf(sc->dev, 275 "failed to create firmware event queue: %d\n", rc); 276 } 277 278 return (rc); 279} 280 281/* 282 * Idempotent 283 */ 284int 285t4_teardown_adapter_iqs(struct adapter *sc) 286{ 287 int i; 288 struct sge_iq *iq; 289 290 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 291 292 iq = &sc->sge.fwq; 293 free_iq(iq); 294 if (sc->flags & INTR_FWD) { 295 for (i = 0; i < NFIQ(sc); i++) { 296 iq = &sc->sge.fiq[i]; 297 free_iq(iq); 298 } 299 } 300 301 return (0); 302} 303 304int 305t4_setup_eth_queues(struct port_info *pi) 306{ 307 int rc = 0, i, intr_idx; 308 struct sge_rxq *rxq; 309 struct sge_txq *txq; 310 char name[16]; 311 struct adapter *sc = pi->adapter; 312 313 if (sysctl_ctx_init(&pi->ctx) == 0) { 314 struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); 315 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 316 317 pi->oid_rxq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, 318 "rxq", CTLFLAG_RD, NULL, "rx queues"); 319 pi->oid_txq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, 320 "txq", CTLFLAG_RD, NULL, "tx queues"); 321 } 322 323 for_each_rxq(pi, i, rxq) { 324 325 snprintf(name, sizeof(name), "%s rxq%d-iq", 326 device_get_nameunit(pi->dev), i); 327 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, 328 pi->qsize_rxq, RX_IQ_ESIZE, 329 sc->flags & INTR_FWD ? t4_intr_data: NULL, name); 330 331 snprintf(name, sizeof(name), "%s rxq%d-fl", 332 device_get_nameunit(pi->dev), i); 333 init_fl(&rxq->fl, pi->qsize_rxq / 8, name); 334 335 if (sc->flags & INTR_FWD) 336 intr_idx = (pi->first_rxq + i) % NFIQ(sc); 337 else 338 intr_idx = pi->first_rxq + i + 2; 339 340 rc = alloc_rxq(pi, rxq, intr_idx, i); 341 if (rc != 0) 342 goto done; 343 344 intr_idx++; 345 } 346 347 for_each_txq(pi, i, txq) { 348 349 snprintf(name, sizeof(name), "%s txq%d", 350 device_get_nameunit(pi->dev), i); 351 init_txq(txq, pi->qsize_txq, name); 352 353 rc = alloc_txq(pi, txq, i); 354 if (rc != 0) 355 goto done; 356 } 357 358done: 359 if (rc) 360 t4_teardown_eth_queues(pi); 361 362 return (rc); 363} 364 365/* 366 * Idempotent 367 */ 368int 369t4_teardown_eth_queues(struct port_info *pi) 370{ 371 int i; 372 struct sge_rxq *rxq; 373 struct sge_txq *txq; 374 375 /* Do this before freeing the queues */ 376 if (pi->oid_txq || pi->oid_rxq) { 377 sysctl_ctx_free(&pi->ctx); 378 pi->oid_txq = pi->oid_rxq = NULL; 379 } 380 381 for_each_txq(pi, i, txq) { 382 free_txq(pi, txq); 383 } 384 385 for_each_rxq(pi, i, rxq) { 386 free_rxq(pi, rxq); 387 } 388 389 return (0); 390} 391 392/* Deals with errors and forwarded interrupts */ 393void 394t4_intr_all(void *arg) 395{ 396 struct adapter *sc = arg; 397 398 t4_intr_err(arg); 399 t4_intr_fwd(&sc->sge.fiq[0]); 400} 401 402/* Deals with forwarded interrupts on the given ingress queue */ 403void 404t4_intr_fwd(void *arg) 405{ 406 struct sge_iq *iq = arg, *q; 407 struct adapter *sc = iq->adapter; 408 struct rsp_ctrl *ctrl; 409 int ndesc_pending = 0, ndesc_total = 0; 410 int qid; 411 412 IQ_LOCK(iq); 413 while (is_new_response(iq, &ctrl)) { 414 415 rmb(); 416 417 /* Only interrupt muxing expected on this queue */ 418 KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_INTR, 419 ("unexpected event on forwarded interrupt queue: %x", 420 G_RSPD_TYPE(ctrl->u.type_gen))); 421 422 qid = ntohl(ctrl->pldbuflen_qid) - sc->sge.iq_start; 423 q = sc->sge.iqmap[qid]; 424 425 q->handler(q); 426 427 ndesc_total++; 428 if (++ndesc_pending >= iq->qsize / 4) { 429 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 430 V_CIDXINC(ndesc_pending) | 431 V_INGRESSQID(iq->cntxt_id) | 432 V_SEINTARM( 433 V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 434 ndesc_pending = 0; 435 } 436 437 iq_next(iq); 438 } 439 IQ_UNLOCK(iq); 440 441 if (ndesc_total > 0) { 442 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 443 V_CIDXINC(ndesc_pending) | V_INGRESSQID((u32)iq->cntxt_id) | 444 V_SEINTARM(iq->intr_params)); 445 } 446} 447 448/* Deals with error interrupts */ 449void 450t4_intr_err(void *arg) 451{ 452 struct adapter *sc = arg; 453 454 if (sc->intr_type == 1) 455 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 456 457 t4_slow_intr_handler(sc); 458} 459 460/* Deals with the firmware event queue */ 461void 462t4_intr_evt(void *arg) 463{ 464 struct sge_iq *iq = arg; 465 struct adapter *sc = iq->adapter; 466 struct rsp_ctrl *ctrl; 467 const struct rss_header *rss; 468 int ndesc_pending = 0, ndesc_total = 0; 469 470 KASSERT(iq == &sc->sge.fwq, ("%s: unexpected ingress queue", __func__)); 471 472 IQ_LOCK(iq); 473 while (is_new_response(iq, &ctrl)) { 474 475 rmb(); 476 477 rss = (const void *)iq->cdesc; 478 479 /* Should only get CPL on this queue */ 480 KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_CPL, 481 ("%s: unexpected type %d", __func__, 482 G_RSPD_TYPE(ctrl->u.type_gen))); 483 484 switch (rss->opcode) { 485 case CPL_FW4_MSG: 486 case CPL_FW6_MSG: { 487 const struct cpl_fw6_msg *cpl; 488 489 cpl = (const void *)(rss + 1); 490 if (cpl->type == FW6_TYPE_CMD_RPL) 491 t4_handle_fw_rpl(sc, cpl->data); 492 493 break; 494 } 495 case CPL_SGE_EGR_UPDATE: { 496 const struct cpl_sge_egr_update *cpl; 497 unsigned int qid; 498 struct sge *s = &sc->sge; 499 struct sge_txq *txq; 500 501 cpl = (const void *)(rss + 1); 502 qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 503 txq = (void *)s->eqmap[qid - s->eq_start]; 504 txq->egr_update++; 505 506 /* XXX: wake up stalled tx */ 507 508 break; 509 } 510 511 default: 512 device_printf(sc->dev, 513 "can't handle CPL opcode %d.", rss->opcode); 514 } 515 516 ndesc_total++; 517 if (++ndesc_pending >= iq->qsize / 4) { 518 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 519 V_CIDXINC(ndesc_pending) | 520 V_INGRESSQID(iq->cntxt_id) | 521 V_SEINTARM( 522 V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 523 ndesc_pending = 0; 524 } 525 iq_next(iq); 526 } 527 IQ_UNLOCK(iq); 528 529 if (ndesc_total > 0) { 530 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 531 V_CIDXINC(ndesc_pending) | V_INGRESSQID(iq->cntxt_id) | 532 V_SEINTARM(iq->intr_params)); 533 } 534} 535 536void 537t4_intr_data(void *arg) 538{ 539 struct sge_rxq *rxq = arg; 540 struct sge_iq *iq = arg; 541 struct rsp_ctrl *ctrl; 542 struct sge_fl *fl = &rxq->fl; 543 struct port_info *pi = rxq->port; 544 struct ifnet *ifp = pi->ifp; 545 struct adapter *sc = pi->adapter; 546 const struct rss_header *rss; 547 const struct cpl_rx_pkt *cpl; 548 int ndescs = 0, rsp_type; 549 uint32_t len; 550 struct mbuf *m0, *m; 551#ifdef INET 552 struct lro_ctrl *lro = &rxq->lro; 553 struct lro_entry *l; 554#endif 555 556 IQ_LOCK(iq); 557 iq->intr_next = iq->intr_params; 558 while (is_new_response(iq, &ctrl)) { 559 560 rmb(); 561 562 rss = (const void *)iq->cdesc; 563 cpl = (const void *)(rss + 1); 564 565 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); 566 567 if (__predict_false(rsp_type == X_RSPD_TYPE_CPL)) { 568 const struct cpl_sge_egr_update *p = (const void *)cpl; 569 unsigned int qid = G_EGR_QID(ntohl(p->opcode_qid)); 570 571 KASSERT(cpl->opcode == CPL_SGE_EGR_UPDATE, 572 ("unexpected opcode on data ingress queue: %x", 573 cpl->opcode)); 574 575 /* XXX: noone's waiting to be woken up... */ 576 wakeup(sc->sge.eqmap[qid - sc->sge.eq_start]); 577 578 ndescs++; 579 iq_next(iq); 580 581 continue; 582 } 583 584 KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_FLBUF, 585 ("unexpected event on data ingress queue: %x", 586 G_RSPD_TYPE(ctrl->u.type_gen))); 587 588 len = be32toh(ctrl->pldbuflen_qid); 589 590 KASSERT(len & F_RSPD_NEWBUF, 591 ("%s: T4 misconfigured to pack buffers.", __func__)); 592 593 len = G_RSPD_LEN(len); 594 m0 = get_fl_sdesc_data(fl, len, M_PKTHDR); 595 if (m0 == NULL) { 596 iq->intr_next = V_QINTR_TIMER_IDX(SGE_NTIMERS - 1); 597 break; 598 } 599 600 len -= FL_PKTSHIFT; 601 m0->m_len -= FL_PKTSHIFT; 602 m0->m_data += FL_PKTSHIFT; 603 604 m0->m_pkthdr.len = len; 605 m0->m_pkthdr.rcvif = ifp; 606 m0->m_flags |= M_FLOWID; 607 m0->m_pkthdr.flowid = rss->hash_val; 608 609 if (cpl->csum_calc && !cpl->err_vec && 610 ifp->if_capenable & IFCAP_RXCSUM) { 611 m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | 612 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 613 if (cpl->ip_frag) 614 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 615 else 616 m0->m_pkthdr.csum_data = 0xffff; 617 rxq->rxcsum++; 618 } 619 620 if (cpl->vlan_ex) { 621 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 622 m0->m_flags |= M_VLANTAG; 623 rxq->vlan_extraction++; 624 } 625 626 len -= m0->m_len; 627 m = m0; 628 while (len) { 629 m->m_next = get_fl_sdesc_data(fl, len, 0); 630 if (m->m_next == NULL) 631 CXGBE_UNIMPLEMENTED("mbuf recovery"); 632 633 m = m->m_next; 634 len -= m->m_len; 635 } 636#ifdef INET 637 if (cpl->l2info & htobe32(F_RXF_LRO) && 638 rxq->flags & RXQ_LRO_ENABLED && 639 tcp_lro_rx(lro, m0, 0) == 0) { 640 /* queued for LRO */ 641 } else 642#endif 643 (*ifp->if_input)(ifp, m0); 644 645 FL_LOCK(fl); 646 if (fl->needed >= 32) { 647 refill_fl(fl, 64); 648 if (fl->pending >= 32) 649 ring_fl_db(sc, fl); 650 } 651 FL_UNLOCK(fl); 652 653 ndescs++; 654 iq_next(iq); 655 656 if (ndescs > 32) { 657 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 658 V_CIDXINC(ndescs) | 659 V_INGRESSQID((u32)iq->cntxt_id) | 660 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 661 ndescs = 0; 662 } 663 } 664 665#ifdef INET 666 while (!SLIST_EMPTY(&lro->lro_active)) { 667 l = SLIST_FIRST(&lro->lro_active); 668 SLIST_REMOVE_HEAD(&lro->lro_active, next); 669 tcp_lro_flush(lro, l); 670 } 671#endif 672 673 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | 674 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next)); 675 676 IQ_UNLOCK(iq); 677 678 FL_LOCK(fl); 679 if (fl->needed) { 680 refill_fl(fl, -1); 681 if (fl->pending >= 8) 682 ring_fl_db(sc, fl); 683 } 684 FL_UNLOCK(fl); 685} 686 687/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */ 688#define TXPKTS_PKT_HDR ((\ 689 sizeof(struct ulp_txpkt) + \ 690 sizeof(struct ulptx_idata) + \ 691 sizeof(struct cpl_tx_pkt_core) \ 692 ) / 8) 693 694/* Header of a coalesced tx WR, before SGL of first packet (in flits) */ 695#define TXPKTS_WR_HDR (\ 696 sizeof(struct fw_eth_tx_pkts_wr) / 8 + \ 697 TXPKTS_PKT_HDR) 698 699/* Header of a tx WR, before SGL of first packet (in flits) */ 700#define TXPKT_WR_HDR ((\ 701 sizeof(struct fw_eth_tx_pkt_wr) + \ 702 sizeof(struct cpl_tx_pkt_core) \ 703 ) / 8 ) 704 705/* Header of a tx LSO WR, before SGL of first packet (in flits) */ 706#define TXPKT_LSO_WR_HDR ((\ 707 sizeof(struct fw_eth_tx_pkt_wr) + \ 708 sizeof(struct cpl_tx_pkt_lso) + \ 709 sizeof(struct cpl_tx_pkt_core) \ 710 ) / 8 ) 711 712int 713t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m) 714{ 715 struct port_info *pi = (void *)ifp->if_softc; 716 struct adapter *sc = pi->adapter; 717 struct sge_eq *eq = &txq->eq; 718 struct buf_ring *br = eq->br; 719 struct mbuf *next; 720 int rc, coalescing; 721 struct txpkts txpkts; 722 struct sgl sgl; 723 724 TXQ_LOCK_ASSERT_OWNED(txq); 725 KASSERT(m, ("%s: called with nothing to do.", __func__)); 726 727 txpkts.npkt = 0;/* indicates there's nothing in txpkts */ 728 coalescing = 0; 729 730 prefetch(&eq->sdesc[eq->pidx]); 731 prefetch(&eq->desc[eq->pidx]); 732 prefetch(&eq->maps[eq->map_pidx]); 733 734 if (eq->avail < 8) 735 reclaim_tx_descs(eq, 1, 8); 736 737 for (; m; m = next ? next : drbr_dequeue(ifp, br)) { 738 739 if (eq->avail < 8) 740 break; 741 742 next = m->m_nextpkt; 743 m->m_nextpkt = NULL; 744 745 if (next || buf_ring_peek(br)) 746 coalescing = 1; 747 748 rc = get_pkt_sgl(txq, &m, &sgl, coalescing); 749 if (rc != 0) { 750 if (rc == ENOMEM) { 751 752 /* Short of resources, suspend tx */ 753 754 m->m_nextpkt = next; 755 break; 756 } 757 758 /* 759 * Unrecoverable error for this packet, throw it away 760 * and move on to the next. get_pkt_sgl may already 761 * have freed m (it will be NULL in that case and the 762 * m_freem here is still safe). 763 */ 764 765 m_freem(m); 766 continue; 767 } 768 769 if (coalescing && 770 add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) { 771 772 /* Successfully absorbed into txpkts */ 773 774 write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl); 775 goto doorbell; 776 } 777 778 /* 779 * We weren't coalescing to begin with, or current frame could 780 * not be coalesced (add_to_txpkts flushes txpkts if a frame 781 * given to it can't be coalesced). Either way there should be 782 * nothing in txpkts. 783 */ 784 KASSERT(txpkts.npkt == 0, 785 ("%s: txpkts not empty: %d", __func__, txpkts.npkt)); 786 787 /* We're sending out individual packets now */ 788 coalescing = 0; 789 790 if (eq->avail < 8) 791 reclaim_tx_descs(eq, 1, 8); 792 rc = write_txpkt_wr(pi, txq, m, &sgl); 793 if (rc != 0) { 794 795 /* Short of hardware descriptors, suspend tx */ 796 797 /* 798 * This is an unlikely but expensive failure. We've 799 * done all the hard work (DMA mappings etc.) and now we 800 * can't send out the packet. What's worse, we have to 801 * spend even more time freeing up everything in sgl. 802 */ 803 txq->no_desc++; 804 free_pkt_sgl(txq, &sgl); 805 806 m->m_nextpkt = next; 807 break; 808 } 809 810 ETHER_BPF_MTAP(ifp, m); 811 if (sgl.nsegs == 0) 812 m_freem(m); 813 814doorbell: 815 /* Fewer and fewer doorbells as the queue fills up */ 816 if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2))) 817 ring_tx_db(sc, eq); 818 reclaim_tx_descs(eq, 16, 32); 819 } 820 821 if (txpkts.npkt > 0) 822 write_txpkts_wr(txq, &txpkts); 823 824 /* 825 * m not NULL means there was an error but we haven't thrown it away. 826 * This can happen when we're short of tx descriptors (no_desc) or maybe 827 * even DMA maps (no_dmamap). Either way, a credit flush and reclaim 828 * will get things going again. 829 * 830 * If eq->avail is already 0 we know a credit flush was requested in the 831 * WR that reduced it to 0 so we don't need another flush (we don't have 832 * any descriptor for a flush WR anyway, duh). 833 */ 834 if (m && eq->avail > 0) 835 write_eqflush_wr(eq); 836 txq->m = m; 837 838 if (eq->pending) 839 ring_tx_db(sc, eq); 840 841 reclaim_tx_descs(eq, 16, eq->qsize); 842 843 return (0); 844} 845 846void 847t4_update_fl_bufsize(struct ifnet *ifp) 848{ 849 struct port_info *pi = ifp->if_softc; 850 struct sge_rxq *rxq; 851 struct sge_fl *fl; 852 int i; 853 854 for_each_rxq(pi, i, rxq) { 855 fl = &rxq->fl; 856 857 FL_LOCK(fl); 858 set_fl_tag_idx(fl, ifp->if_mtu); 859 FL_UNLOCK(fl); 860 } 861} 862 863/* 864 * A non-NULL handler indicates this iq will not receive direct interrupts, the 865 * handler will be invoked by a forwarded interrupt queue. 866 */ 867static inline void 868init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 869 int qsize, int esize, iq_intr_handler_t *handler, char *name) 870{ 871 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 872 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 873 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 874 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 875 876 iq->flags = 0; 877 iq->adapter = sc; 878 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx) | 879 V_QINTR_CNT_EN(pktc_idx >= 0); 880 iq->intr_pktc_idx = pktc_idx; 881 iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */ 882 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */ 883 iq->handler = handler; 884 strlcpy(iq->lockname, name, sizeof(iq->lockname)); 885} 886 887static inline void 888init_fl(struct sge_fl *fl, int qsize, char *name) 889{ 890 fl->qsize = qsize; 891 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 892} 893 894static inline void 895init_txq(struct sge_txq *txq, int qsize, char *name) 896{ 897 txq->eq.qsize = qsize; 898 strlcpy(txq->eq.lockname, name, sizeof(txq->eq.lockname)); 899} 900 901static int 902alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 903 bus_dmamap_t *map, bus_addr_t *pa, void **va) 904{ 905 int rc; 906 907 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 908 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 909 if (rc != 0) { 910 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 911 goto done; 912 } 913 914 rc = bus_dmamem_alloc(*tag, va, 915 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 916 if (rc != 0) { 917 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 918 goto done; 919 } 920 921 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 922 if (rc != 0) { 923 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 924 goto done; 925 } 926done: 927 if (rc) 928 free_ring(sc, *tag, *map, *pa, *va); 929 930 return (rc); 931} 932 933static int 934free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 935 bus_addr_t pa, void *va) 936{ 937 if (pa) 938 bus_dmamap_unload(tag, map); 939 if (va) 940 bus_dmamem_free(tag, va, map); 941 if (tag) 942 bus_dma_tag_destroy(tag); 943 944 return (0); 945} 946 947/* 948 * Allocates the ring for an ingress queue and an optional freelist. If the 949 * freelist is specified it will be allocated and then associated with the 950 * ingress queue. 951 * 952 * Returns errno on failure. Resources allocated up to that point may still be 953 * allocated. Caller is responsible for cleanup in case this function fails. 954 * 955 * If the ingress queue will take interrupts directly (iq->handler == NULL) then 956 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies 957 * the index of the queue to which its interrupts will be forwarded. 958 */ 959static int 960alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, 961 int intr_idx) 962{ 963 int rc, i, cntxt_id; 964 size_t len; 965 struct fw_iq_cmd c; 966 struct adapter *sc = iq->adapter; 967 __be32 v = 0; 968 969 /* The adapter queues are nominally allocated in port[0]'s name */ 970 if (pi == NULL) 971 pi = sc->port[0]; 972 973 mtx_init(&iq->iq_lock, iq->lockname, NULL, MTX_DEF); 974 975 len = iq->qsize * iq->esize; 976 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 977 (void **)&iq->desc); 978 if (rc != 0) 979 return (rc); 980 981 bzero(&c, sizeof(c)); 982 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 983 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 984 V_FW_IQ_CMD_VFN(0)); 985 986 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 987 FW_LEN16(c)); 988 989 /* Special handling for firmware event queue */ 990 if (iq == &sc->sge.fwq) 991 v |= F_FW_IQ_CMD_IQASYNCH; 992 993 if (iq->handler) { 994 KASSERT(intr_idx < NFIQ(sc), 995 ("%s: invalid indirect intr_idx %d", __func__, intr_idx)); 996 v |= F_FW_IQ_CMD_IQANDST; 997 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fiq[intr_idx].abs_id); 998 } else { 999 KASSERT(intr_idx < sc->intr_count, 1000 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 1001 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 1002 } 1003 1004 c.type_to_iqandstindex = htobe32(v | 1005 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 1006 V_FW_IQ_CMD_VIID(pi->viid) | 1007 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 1008 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 1009 F_FW_IQ_CMD_IQGTSMODE | 1010 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 1011 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4)); 1012 c.iqsize = htobe16(iq->qsize); 1013 c.iqaddr = htobe64(iq->ba); 1014 1015 if (fl) { 1016 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 1017 1018 for (i = 0; i < FL_BUF_SIZES; i++) { 1019 1020 /* 1021 * A freelist buffer must be 16 byte aligned as the SGE 1022 * uses the low 4 bits of the bus addr to figure out the 1023 * buffer size. 1024 */ 1025 rc = bus_dma_tag_create(sc->dmat, 16, 0, 1026 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1027 FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW, 1028 NULL, NULL, &fl->tag[i]); 1029 if (rc != 0) { 1030 device_printf(sc->dev, 1031 "failed to create fl DMA tag[%d]: %d\n", 1032 i, rc); 1033 return (rc); 1034 } 1035 } 1036 len = fl->qsize * RX_FL_ESIZE; 1037 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 1038 &fl->ba, (void **)&fl->desc); 1039 if (rc) 1040 return (rc); 1041 1042 /* Allocate space for one software descriptor per buffer. */ 1043 fl->cap = (fl->qsize - SPG_LEN / RX_FL_ESIZE) * 8; 1044 FL_LOCK(fl); 1045 set_fl_tag_idx(fl, pi->ifp->if_mtu); 1046 rc = alloc_fl_sdesc(fl); 1047 FL_UNLOCK(fl); 1048 if (rc != 0) { 1049 device_printf(sc->dev, 1050 "failed to setup fl software descriptors: %d\n", 1051 rc); 1052 return (rc); 1053 } 1054 fl->needed = fl->cap - 1; /* one less to avoid cidx = pidx */ 1055 1056 c.iqns_to_fl0congen = 1057 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE)); 1058 c.fl0dcaen_to_fl0cidxfthresh = 1059 htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) | 1060 V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B)); 1061 c.fl0size = htobe16(fl->qsize); 1062 c.fl0addr = htobe64(fl->ba); 1063 } 1064 1065 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1066 if (rc != 0) { 1067 device_printf(sc->dev, 1068 "failed to create ingress queue: %d\n", rc); 1069 return (rc); 1070 } 1071 1072 iq->cdesc = iq->desc; 1073 iq->cidx = 0; 1074 iq->gen = 1; 1075 iq->intr_next = iq->intr_params; 1076 iq->cntxt_id = be16toh(c.iqid); 1077 iq->abs_id = be16toh(c.physiqid); 1078 iq->flags |= (IQ_ALLOCATED | IQ_STARTED); 1079 1080 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 1081 KASSERT(cntxt_id < sc->sge.niq, 1082 ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 1083 cntxt_id, sc->sge.niq - 1)); 1084 sc->sge.iqmap[cntxt_id] = iq; 1085 1086 if (fl) { 1087 fl->cntxt_id = be16toh(c.fl0id); 1088 fl->pidx = fl->cidx = 0; 1089 1090 cntxt_id = iq->cntxt_id - sc->sge.eq_start; 1091 KASSERT(cntxt_id < sc->sge.neq, 1092 ("%s: fl->cntxt_id (%d) more than the max (%d)", __func__, 1093 cntxt_id, sc->sge.neq - 1)); 1094 sc->sge.eqmap[cntxt_id] = (void *)fl; 1095 1096 FL_LOCK(fl); 1097 refill_fl(fl, -1); 1098 if (fl->pending >= 8) 1099 ring_fl_db(sc, fl); 1100 FL_UNLOCK(fl); 1101 } 1102 1103 /* Enable IQ interrupts */ 1104 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) | 1105 V_INGRESSQID(iq->cntxt_id)); 1106 1107 return (0); 1108} 1109 1110/* 1111 * This can be called with the iq/fl in any state - fully allocated and 1112 * functional, partially allocated, even all-zeroed out. 1113 */ 1114static int 1115free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) 1116{ 1117 int i, rc; 1118 struct adapter *sc = iq->adapter; 1119 device_t dev; 1120 1121 if (sc == NULL) 1122 return (0); /* nothing to do */ 1123 1124 dev = pi ? pi->dev : sc->dev; 1125 1126 if (iq->flags & IQ_STARTED) { 1127 rc = -t4_iq_start_stop(sc, sc->mbox, 0, sc->pf, 0, 1128 iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); 1129 if (rc != 0) { 1130 device_printf(dev, 1131 "failed to stop queue %p: %d\n", iq, rc); 1132 return (rc); 1133 } 1134 iq->flags &= ~IQ_STARTED; 1135 } 1136 1137 if (iq->flags & IQ_ALLOCATED) { 1138 1139 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 1140 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 1141 fl ? fl->cntxt_id : 0xffff, 0xffff); 1142 if (rc != 0) { 1143 device_printf(dev, 1144 "failed to free queue %p: %d\n", iq, rc); 1145 return (rc); 1146 } 1147 iq->flags &= ~IQ_ALLOCATED; 1148 } 1149 1150 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 1151 1152 if (mtx_initialized(&iq->iq_lock)) 1153 mtx_destroy(&iq->iq_lock); 1154 1155 bzero(iq, sizeof(*iq)); 1156 1157 if (fl) { 1158 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 1159 fl->desc); 1160 1161 if (fl->sdesc) { 1162 FL_LOCK(fl); 1163 free_fl_sdesc(fl); 1164 FL_UNLOCK(fl); 1165 } 1166 1167 if (mtx_initialized(&fl->fl_lock)) 1168 mtx_destroy(&fl->fl_lock); 1169 1170 for (i = 0; i < FL_BUF_SIZES; i++) { 1171 if (fl->tag[i]) 1172 bus_dma_tag_destroy(fl->tag[i]); 1173 } 1174 1175 bzero(fl, sizeof(*fl)); 1176 } 1177 1178 return (0); 1179} 1180 1181static int 1182alloc_iq(struct sge_iq *iq, int intr_idx) 1183{ 1184 return alloc_iq_fl(NULL, iq, NULL, intr_idx); 1185} 1186 1187static int 1188free_iq(struct sge_iq *iq) 1189{ 1190 return free_iq_fl(NULL, iq, NULL); 1191} 1192 1193static int 1194alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx) 1195{ 1196 int rc; 1197 struct sysctl_oid *oid; 1198 struct sysctl_oid_list *children; 1199 char name[16]; 1200 1201 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx); 1202 if (rc != 0) 1203 return (rc); 1204 1205#ifdef INET 1206 rc = tcp_lro_init(&rxq->lro); 1207 if (rc != 0) 1208 return (rc); 1209 rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */ 1210 1211 if (pi->ifp->if_capenable & IFCAP_LRO) 1212 rxq->flags |= RXQ_LRO_ENABLED; 1213#endif 1214 rxq->port = pi; 1215 1216 children = SYSCTL_CHILDREN(pi->oid_rxq); 1217 1218 snprintf(name, sizeof(name), "%d", idx); 1219 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1220 NULL, "rx queue"); 1221 children = SYSCTL_CHILDREN(oid); 1222 1223 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 1224 &rxq->lro.lro_queued, 0, NULL); 1225 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 1226 &rxq->lro.lro_flushed, 0, NULL); 1227 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 1228 &rxq->rxcsum, "# of times hardware assisted with checksum"); 1229 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction", 1230 CTLFLAG_RD, &rxq->vlan_extraction, 1231 "# of times hardware extracted 802.1Q tag"); 1232 1233 return (rc); 1234} 1235 1236static int 1237free_rxq(struct port_info *pi, struct sge_rxq *rxq) 1238{ 1239 int rc; 1240 1241#ifdef INET 1242 if (rxq->lro.ifp) { 1243 tcp_lro_free(&rxq->lro); 1244 rxq->lro.ifp = NULL; 1245 } 1246#endif 1247 1248 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl); 1249 if (rc == 0) 1250 bzero(rxq, sizeof(*rxq)); 1251 1252 return (rc); 1253} 1254 1255static int 1256alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) 1257{ 1258 int rc, cntxt_id; 1259 size_t len; 1260 struct adapter *sc = pi->adapter; 1261 struct fw_eq_eth_cmd c; 1262 struct sge_eq *eq = &txq->eq; 1263 char name[16]; 1264 struct sysctl_oid *oid; 1265 struct sysctl_oid_list *children; 1266 1267 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 1268 1269 len = eq->qsize * TX_EQ_ESIZE; 1270 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 1271 &eq->ba, (void **)&eq->desc); 1272 if (rc) 1273 return (rc); 1274 1275 eq->cap = eq->qsize - SPG_LEN / TX_EQ_ESIZE; 1276 eq->spg = (void *)&eq->desc[eq->cap]; 1277 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ 1278 eq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE, 1279 M_ZERO | M_WAITOK); 1280 eq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock); 1281 1282 rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR, 1283 BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS, 1284 BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &eq->tx_tag); 1285 if (rc != 0) { 1286 device_printf(sc->dev, 1287 "failed to create tx DMA tag: %d\n", rc); 1288 return (rc); 1289 } 1290 1291 rc = alloc_eq_maps(eq); 1292 if (rc != 0) { 1293 device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc); 1294 return (rc); 1295 } 1296 1297 bzero(&c, sizeof(c)); 1298 1299 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 1300 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 1301 V_FW_EQ_ETH_CMD_VFN(0)); 1302 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 1303 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 1304 c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid)); 1305 c.fetchszm_to_iqid = 1306 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 1307 V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | 1308 V_FW_EQ_ETH_CMD_IQID(sc->sge.rxq[pi->first_rxq].iq.cntxt_id)); 1309 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1310 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1311 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1312 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)); 1313 c.eqaddr = htobe64(eq->ba); 1314 1315 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1316 if (rc != 0) { 1317 device_printf(pi->dev, 1318 "failed to create egress queue: %d\n", rc); 1319 return (rc); 1320 } 1321 1322 eq->pidx = eq->cidx = 0; 1323 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 1324 eq->flags |= (EQ_ALLOCATED | EQ_STARTED); 1325 1326 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 1327 KASSERT(cntxt_id < sc->sge.neq, 1328 ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 1329 cntxt_id, sc->sge.neq - 1)); 1330 sc->sge.eqmap[cntxt_id] = eq; 1331 1332 children = SYSCTL_CHILDREN(pi->oid_txq); 1333 1334 snprintf(name, sizeof(name), "%d", idx); 1335 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1336 NULL, "tx queue"); 1337 children = SYSCTL_CHILDREN(oid); 1338 1339 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 1340 &txq->txcsum, "# of times hardware assisted with checksum"); 1341 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion", 1342 CTLFLAG_RD, &txq->vlan_insertion, 1343 "# of times hardware inserted 802.1Q tag"); 1344 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 1345 &txq->tso_wrs, "# of IPv4 TSO work requests"); 1346 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 1347 &txq->imm_wrs, "# of work requests with immediate data"); 1348 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 1349 &txq->sgl_wrs, "# of work requests with direct SGL"); 1350 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 1351 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 1352 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD, 1353 &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)"); 1354 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD, 1355 &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests"); 1356 1357 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD, 1358 &txq->no_dmamap, 0, "# of times txq ran out of DMA maps"); 1359 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 1360 &txq->no_desc, 0, "# of times txq ran out of hardware descriptors"); 1361 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD, 1362 &txq->egr_update, 0, "egress update notifications from the SGE"); 1363 1364 return (rc); 1365} 1366 1367static int 1368free_txq(struct port_info *pi, struct sge_txq *txq) 1369{ 1370 int rc; 1371 struct adapter *sc = pi->adapter; 1372 struct sge_eq *eq = &txq->eq; 1373 1374 if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) { 1375 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 1376 if (rc != 0) { 1377 device_printf(pi->dev, 1378 "failed to free egress queue %p: %d\n", eq, rc); 1379 return (rc); 1380 } 1381 eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED); 1382 } 1383 1384 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 1385 1386 free(eq->sdesc, M_CXGBE); 1387 1388 if (eq->maps) 1389 free_eq_maps(eq); 1390 1391 buf_ring_free(eq->br, M_CXGBE); 1392 1393 if (eq->tx_tag) 1394 bus_dma_tag_destroy(eq->tx_tag); 1395 1396 if (mtx_initialized(&eq->eq_lock)) 1397 mtx_destroy(&eq->eq_lock); 1398 1399 bzero(txq, sizeof(*txq)); 1400 return (0); 1401} 1402 1403static void 1404oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 1405{ 1406 bus_addr_t *ba = arg; 1407 1408 KASSERT(nseg == 1, 1409 ("%s meant for single segment mappings only.", __func__)); 1410 1411 *ba = error ? 0 : segs->ds_addr; 1412} 1413 1414static inline bool 1415is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl) 1416{ 1417 *ctrl = (void *)((uintptr_t)iq->cdesc + 1418 (iq->esize - sizeof(struct rsp_ctrl))); 1419 1420 return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen); 1421} 1422 1423static inline void 1424iq_next(struct sge_iq *iq) 1425{ 1426 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize); 1427 if (__predict_false(++iq->cidx == iq->qsize - 1)) { 1428 iq->cidx = 0; 1429 iq->gen ^= 1; 1430 iq->cdesc = iq->desc; 1431 } 1432} 1433 1434static inline void 1435ring_fl_db(struct adapter *sc, struct sge_fl *fl) 1436{ 1437 int ndesc = fl->pending / 8; 1438 1439 /* Caller responsible for ensuring there's something useful to do */ 1440 KASSERT(ndesc > 0, ("%s called with no useful work to do.", __func__)); 1441 1442 wmb(); 1443 1444 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO | 1445 V_QID(fl->cntxt_id) | V_PIDX(ndesc)); 1446 1447 fl->pending &= 7; 1448} 1449 1450static void 1451refill_fl(struct sge_fl *fl, int nbufs) 1452{ 1453 __be64 *d = &fl->desc[fl->pidx]; 1454 struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; 1455 bus_dma_tag_t tag; 1456 bus_addr_t pa; 1457 caddr_t cl; 1458 int rc; 1459 1460 FL_LOCK_ASSERT_OWNED(fl); 1461 1462 if (nbufs < 0 || nbufs > fl->needed) 1463 nbufs = fl->needed; 1464 1465 while (nbufs--) { 1466 1467 if (sd->cl != NULL) { 1468 1469 /* 1470 * This happens when a frame small enough to fit 1471 * entirely in an mbuf was received in cl last time. 1472 * We'd held on to cl and can reuse it now. Note that 1473 * we reuse a cluster of the old size if fl->tag_idx is 1474 * no longer the same as sd->tag_idx. 1475 */ 1476 1477 KASSERT(*d == sd->ba_tag, 1478 ("%s: recyling problem at pidx %d", 1479 __func__, fl->pidx)); 1480 1481 d++; 1482 goto recycled; 1483 } 1484 1485 1486 if (fl->tag_idx != sd->tag_idx) { 1487 bus_dmamap_t map; 1488 bus_dma_tag_t newtag = fl->tag[fl->tag_idx]; 1489 bus_dma_tag_t oldtag = fl->tag[sd->tag_idx]; 1490 1491 /* 1492 * An MTU change can get us here. Discard the old map 1493 * which was created with the old tag, but only if 1494 * we're able to get a new one. 1495 */ 1496 rc = bus_dmamap_create(newtag, 0, &map); 1497 if (rc == 0) { 1498 bus_dmamap_destroy(oldtag, sd->map); 1499 sd->map = map; 1500 sd->tag_idx = fl->tag_idx; 1501 } 1502 } 1503 1504 tag = fl->tag[sd->tag_idx]; 1505 1506 cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx)); 1507 if (cl == NULL) 1508 break; 1509 1510 rc = bus_dmamap_load(tag, sd->map, cl, 1511 FL_BUF_SIZE(sd->tag_idx), oneseg_dma_callback, 1512 &pa, 0); 1513 if (rc != 0 || pa == 0) { 1514 fl->dmamap_failed++; 1515 uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl); 1516 break; 1517 } 1518 1519 sd->cl = cl; 1520 *d++ = htobe64(pa | sd->tag_idx); 1521 1522#ifdef INVARIANTS 1523 sd->ba_tag = htobe64(pa | sd->tag_idx); 1524#endif 1525 1526recycled: fl->pending++; 1527 fl->needed--; 1528 sd++; 1529 if (++fl->pidx == fl->cap) { 1530 fl->pidx = 0; 1531 sd = fl->sdesc; 1532 d = fl->desc; 1533 } 1534 1535 /* No harm if gethdr fails, we'll retry after rx */ 1536 if (sd->m == NULL) 1537 sd->m = m_gethdr(M_NOWAIT, MT_NOINIT); 1538 } 1539} 1540 1541static int 1542alloc_fl_sdesc(struct sge_fl *fl) 1543{ 1544 struct fl_sdesc *sd; 1545 bus_dma_tag_t tag; 1546 int i, rc; 1547 1548 FL_LOCK_ASSERT_OWNED(fl); 1549 1550 fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE, 1551 M_ZERO | M_WAITOK); 1552 1553 tag = fl->tag[fl->tag_idx]; 1554 sd = fl->sdesc; 1555 for (i = 0; i < fl->cap; i++, sd++) { 1556 1557 sd->tag_idx = fl->tag_idx; 1558 rc = bus_dmamap_create(tag, 0, &sd->map); 1559 if (rc != 0) 1560 goto failed; 1561 1562 /* Doesn't matter if this succeeds or not */ 1563 sd->m = m_gethdr(M_NOWAIT, MT_NOINIT); 1564 } 1565 1566 return (0); 1567failed: 1568 while (--i >= 0) { 1569 sd--; 1570 bus_dmamap_destroy(tag, sd->map); 1571 if (sd->m) { 1572 m_init(sd->m, zone_mbuf, MLEN, M_NOWAIT, MT_DATA, 0); 1573 m_free(sd->m); 1574 sd->m = NULL; 1575 } 1576 } 1577 KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__)); 1578 1579 free(fl->sdesc, M_CXGBE); 1580 fl->sdesc = NULL; 1581 1582 return (rc); 1583} 1584 1585static void 1586free_fl_sdesc(struct sge_fl *fl) 1587{ 1588 struct fl_sdesc *sd; 1589 int i; 1590 1591 FL_LOCK_ASSERT_OWNED(fl); 1592 1593 sd = fl->sdesc; 1594 for (i = 0; i < fl->cap; i++, sd++) { 1595 1596 if (sd->m) { 1597 m_init(sd->m, zone_mbuf, MLEN, M_NOWAIT, MT_DATA, 0); 1598 m_free(sd->m); 1599 sd->m = NULL; 1600 } 1601 1602 if (sd->cl) { 1603 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 1604 uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl); 1605 sd->cl = NULL; 1606 } 1607 1608 bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map); 1609 } 1610 1611 free(fl->sdesc, M_CXGBE); 1612 fl->sdesc = NULL; 1613} 1614 1615static int 1616alloc_eq_maps(struct sge_eq *eq) 1617{ 1618 struct tx_map *txm; 1619 int i, rc, count; 1620 1621 /* 1622 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE 1623 * limit for any WR). txq->no_dmamap events shouldn't occur if maps is 1624 * sized for the worst case. 1625 */ 1626 count = eq->qsize * 10 / 8; 1627 eq->map_total = eq->map_avail = count; 1628 eq->map_cidx = eq->map_pidx = 0; 1629 1630 eq->maps = malloc(count * sizeof(struct tx_map), M_CXGBE, 1631 M_ZERO | M_WAITOK); 1632 1633 txm = eq->maps; 1634 for (i = 0; i < count; i++, txm++) { 1635 rc = bus_dmamap_create(eq->tx_tag, 0, &txm->map); 1636 if (rc != 0) 1637 goto failed; 1638 } 1639 1640 return (0); 1641failed: 1642 while (--i >= 0) { 1643 txm--; 1644 bus_dmamap_destroy(eq->tx_tag, txm->map); 1645 } 1646 KASSERT(txm == eq->maps, ("%s: EDOOFUS", __func__)); 1647 1648 free(eq->maps, M_CXGBE); 1649 eq->maps = NULL; 1650 1651 return (rc); 1652} 1653 1654static void 1655free_eq_maps(struct sge_eq *eq) 1656{ 1657 struct tx_map *txm; 1658 int i; 1659 1660 txm = eq->maps; 1661 for (i = 0; i < eq->map_total; i++, txm++) { 1662 1663 if (txm->m) { 1664 bus_dmamap_unload(eq->tx_tag, txm->map); 1665 m_freem(txm->m); 1666 txm->m = NULL; 1667 } 1668 1669 bus_dmamap_destroy(eq->tx_tag, txm->map); 1670 } 1671 1672 free(eq->maps, M_CXGBE); 1673 eq->maps = NULL; 1674} 1675 1676/* 1677 * We'll do immediate data tx for non-TSO, but only when not coalescing. We're 1678 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes 1679 * of immediate data. 1680 */ 1681#define IMM_LEN ( \ 1682 2 * TX_EQ_ESIZE \ 1683 - sizeof(struct fw_eth_tx_pkt_wr) \ 1684 - sizeof(struct cpl_tx_pkt_core)) 1685 1686/* 1687 * Returns non-zero on failure, no need to cleanup anything in that case. 1688 * 1689 * Note 1: We always try to defrag the mbuf if required and return EFBIG only 1690 * if the resulting chain still won't fit in a tx descriptor. 1691 * 1692 * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf 1693 * does not have the TCP header in it. 1694 */ 1695static int 1696get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl, 1697 int sgl_only) 1698{ 1699 struct mbuf *m = *fp; 1700 struct sge_eq *eq = &txq->eq; 1701 struct tx_map *txm; 1702 int rc, defragged = 0, n; 1703 1704 TXQ_LOCK_ASSERT_OWNED(txq); 1705 1706 if (m->m_pkthdr.tso_segsz) 1707 sgl_only = 1; /* Do not allow immediate data with LSO */ 1708 1709start: sgl->nsegs = 0; 1710 1711 if (m->m_pkthdr.len <= IMM_LEN && !sgl_only) 1712 return (0); /* nsegs = 0 tells caller to use imm. tx */ 1713 1714 if (eq->map_avail == 0) { 1715 txq->no_dmamap++; 1716 return (ENOMEM); 1717 } 1718 txm = &eq->maps[eq->map_pidx]; 1719 1720 if (m->m_pkthdr.tso_segsz && m->m_len < 50) { 1721 *fp = m_pullup(m, 50); 1722 m = *fp; 1723 if (m == NULL) 1724 return (ENOBUFS); 1725 } 1726 1727 rc = bus_dmamap_load_mbuf_sg(eq->tx_tag, txm->map, m, sgl->seg, 1728 &sgl->nsegs, BUS_DMA_NOWAIT); 1729 if (rc == EFBIG && defragged == 0) { 1730 m = m_defrag(m, M_DONTWAIT); 1731 if (m == NULL) 1732 return (EFBIG); 1733 1734 defragged = 1; 1735 *fp = m; 1736 goto start; 1737 } 1738 if (rc != 0) 1739 return (rc); 1740 1741 txm->m = m; 1742 eq->map_avail--; 1743 if (++eq->map_pidx == eq->map_total) 1744 eq->map_pidx = 0; 1745 1746 KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS, 1747 ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs)); 1748 1749 /* 1750 * Store the # of flits required to hold this frame's SGL in nflits. An 1751 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by 1752 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used 1753 * then len1 must be set to 0. 1754 */ 1755 n = sgl->nsegs - 1; 1756 sgl->nflits = (3 * n) / 2 + (n & 1) + 2; 1757 1758 return (0); 1759} 1760 1761 1762/* 1763 * Releases all the txq resources used up in the specified sgl. 1764 */ 1765static int 1766free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl) 1767{ 1768 struct sge_eq *eq = &txq->eq; 1769 struct tx_map *txm; 1770 1771 TXQ_LOCK_ASSERT_OWNED(txq); 1772 1773 if (sgl->nsegs == 0) 1774 return (0); /* didn't use any map */ 1775 1776 /* 1 pkt uses exactly 1 map, back it out */ 1777 1778 eq->map_avail++; 1779 if (eq->map_pidx > 0) 1780 eq->map_pidx--; 1781 else 1782 eq->map_pidx = eq->map_total - 1; 1783 1784 txm = &eq->maps[eq->map_pidx]; 1785 bus_dmamap_unload(eq->tx_tag, txm->map); 1786 txm->m = NULL; 1787 1788 return (0); 1789} 1790 1791static int 1792write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m, 1793 struct sgl *sgl) 1794{ 1795 struct sge_eq *eq = &txq->eq; 1796 struct fw_eth_tx_pkt_wr *wr; 1797 struct cpl_tx_pkt_core *cpl; 1798 uint32_t ctrl; /* used in many unrelated places */ 1799 uint64_t ctrl1; 1800 int nflits, ndesc; 1801 struct tx_sdesc *txsd; 1802 caddr_t dst; 1803 1804 TXQ_LOCK_ASSERT_OWNED(txq); 1805 1806 /* 1807 * Do we have enough flits to send this frame out? 1808 */ 1809 ctrl = sizeof(struct cpl_tx_pkt_core); 1810 if (m->m_pkthdr.tso_segsz) { 1811 nflits = TXPKT_LSO_WR_HDR; 1812 ctrl += sizeof(struct cpl_tx_pkt_lso); 1813 } else 1814 nflits = TXPKT_WR_HDR; 1815 if (sgl->nsegs > 0) 1816 nflits += sgl->nflits; 1817 else { 1818 nflits += howmany(m->m_pkthdr.len, 8); 1819 ctrl += m->m_pkthdr.len; 1820 } 1821 ndesc = howmany(nflits, 8); 1822 if (ndesc > eq->avail) 1823 return (ENOMEM); 1824 1825 /* Firmware work request header */ 1826 wr = (void *)&eq->desc[eq->pidx]; 1827 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 1828 V_FW_WR_IMMDLEN(ctrl)); 1829 ctrl = V_FW_WR_LEN16(howmany(nflits, 2)); 1830 if (eq->avail == ndesc) 1831 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 1832 wr->equiq_to_len16 = htobe32(ctrl); 1833 wr->r3 = 0; 1834 1835 if (m->m_pkthdr.tso_segsz) { 1836 struct cpl_tx_pkt_lso *lso = (void *)(wr + 1); 1837 struct ether_header *eh; 1838 struct ip *ip; 1839 struct tcphdr *tcp; 1840 1841 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 1842 F_LSO_LAST_SLICE; 1843 1844 eh = mtod(m, struct ether_header *); 1845 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1846 ctrl |= V_LSO_ETHHDR_LEN(1); 1847 ip = (void *)((struct ether_vlan_header *)eh + 1); 1848 } else 1849 ip = (void *)(eh + 1); 1850 1851 tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4); 1852 ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) | 1853 V_LSO_TCPHDR_LEN(tcp->th_off); 1854 1855 lso->lso_ctrl = htobe32(ctrl); 1856 lso->ipid_ofst = htobe16(0); 1857 lso->mss = htobe16(m->m_pkthdr.tso_segsz); 1858 lso->seqno_offset = htobe32(0); 1859 lso->len = htobe32(m->m_pkthdr.len); 1860 1861 cpl = (void *)(lso + 1); 1862 1863 txq->tso_wrs++; 1864 } else 1865 cpl = (void *)(wr + 1); 1866 1867 /* Checksum offload */ 1868 ctrl1 = 0; 1869 if (!(m->m_pkthdr.csum_flags & CSUM_IP)) 1870 ctrl1 |= F_TXPKT_IPCSUM_DIS; 1871 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) 1872 ctrl1 |= F_TXPKT_L4CSUM_DIS; 1873 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP)) 1874 txq->txcsum++; /* some hardware assistance provided */ 1875 1876 /* VLAN tag insertion */ 1877 if (m->m_flags & M_VLANTAG) { 1878 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 1879 txq->vlan_insertion++; 1880 } 1881 1882 /* CPL header */ 1883 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 1884 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 1885 cpl->pack = 0; 1886 cpl->len = htobe16(m->m_pkthdr.len); 1887 cpl->ctrl1 = htobe64(ctrl1); 1888 1889 /* Software descriptor */ 1890 txsd = &eq->sdesc[eq->pidx]; 1891 txsd->desc_used = ndesc; 1892 1893 eq->pending += ndesc; 1894 eq->avail -= ndesc; 1895 eq->pidx += ndesc; 1896 if (eq->pidx >= eq->cap) 1897 eq->pidx -= eq->cap; 1898 1899 /* SGL */ 1900 dst = (void *)(cpl + 1); 1901 if (sgl->nsegs > 0) { 1902 txsd->map_used = 1; 1903 txq->sgl_wrs++; 1904 write_sgl_to_txd(eq, sgl, &dst); 1905 } else { 1906 txsd->map_used = 0; 1907 txq->imm_wrs++; 1908 for (; m; m = m->m_next) { 1909 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 1910 } 1911 } 1912 1913 txq->txpkt_wrs++; 1914 return (0); 1915} 1916 1917/* 1918 * Returns 0 to indicate that m has been accepted into a coalesced tx work 1919 * request. It has either been folded into txpkts or txpkts was flushed and m 1920 * has started a new coalesced work request (as the first frame in a fresh 1921 * txpkts). 1922 * 1923 * Returns non-zero to indicate a failure - caller is responsible for 1924 * transmitting m, if there was anything in txpkts it has been flushed. 1925 */ 1926static int 1927add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts, 1928 struct mbuf *m, struct sgl *sgl) 1929{ 1930 struct sge_eq *eq = &txq->eq; 1931 int can_coalesce; 1932 struct tx_sdesc *txsd; 1933 int flits; 1934 1935 TXQ_LOCK_ASSERT_OWNED(txq); 1936 1937 if (txpkts->npkt > 0) { 1938 flits = TXPKTS_PKT_HDR + sgl->nflits; 1939 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 1940 txpkts->nflits + flits <= TX_WR_FLITS && 1941 txpkts->nflits + flits <= eq->avail * 8 && 1942 txpkts->plen + m->m_pkthdr.len < 65536; 1943 1944 if (can_coalesce) { 1945 txpkts->npkt++; 1946 txpkts->nflits += flits; 1947 txpkts->plen += m->m_pkthdr.len; 1948 1949 txsd = &eq->sdesc[eq->pidx]; 1950 txsd->map_used++; 1951 1952 return (0); 1953 } 1954 1955 /* 1956 * Couldn't coalesce m into txpkts. The first order of business 1957 * is to send txpkts on its way. Then we'll revisit m. 1958 */ 1959 write_txpkts_wr(txq, txpkts); 1960 } 1961 1962 /* 1963 * Check if we can start a new coalesced tx work request with m as 1964 * the first packet in it. 1965 */ 1966 1967 KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__)); 1968 1969 flits = TXPKTS_WR_HDR + sgl->nflits; 1970 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 1971 flits <= eq->avail * 8 && flits <= TX_WR_FLITS; 1972 1973 if (can_coalesce == 0) 1974 return (EINVAL); 1975 1976 /* 1977 * Start a fresh coalesced tx WR with m as the first frame in it. 1978 */ 1979 txpkts->npkt = 1; 1980 txpkts->nflits = flits; 1981 txpkts->flitp = &eq->desc[eq->pidx].flit[2]; 1982 txpkts->plen = m->m_pkthdr.len; 1983 1984 txsd = &eq->sdesc[eq->pidx]; 1985 txsd->map_used = 1; 1986 1987 return (0); 1988} 1989 1990/* 1991 * Note that write_txpkts_wr can never run out of hardware descriptors (but 1992 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for 1993 * coalescing only if sufficient hardware descriptors are available. 1994 */ 1995static void 1996write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) 1997{ 1998 struct sge_eq *eq = &txq->eq; 1999 struct fw_eth_tx_pkts_wr *wr; 2000 struct tx_sdesc *txsd; 2001 uint32_t ctrl; 2002 int ndesc; 2003 2004 TXQ_LOCK_ASSERT_OWNED(txq); 2005 2006 ndesc = howmany(txpkts->nflits, 8); 2007 2008 wr = (void *)&eq->desc[eq->pidx]; 2009 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) | 2010 V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */ 2011 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2)); 2012 if (eq->avail == ndesc) 2013 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2014 wr->equiq_to_len16 = htobe32(ctrl); 2015 wr->plen = htobe16(txpkts->plen); 2016 wr->npkt = txpkts->npkt; 2017 wr->r3 = wr->r4 = 0; 2018 2019 /* Everything else already written */ 2020 2021 txsd = &eq->sdesc[eq->pidx]; 2022 txsd->desc_used = ndesc; 2023 2024 KASSERT(eq->avail >= ndesc, ("%s: out ouf descriptors", __func__)); 2025 2026 eq->pending += ndesc; 2027 eq->avail -= ndesc; 2028 eq->pidx += ndesc; 2029 if (eq->pidx >= eq->cap) 2030 eq->pidx -= eq->cap; 2031 2032 txq->txpkts_pkts += txpkts->npkt; 2033 txq->txpkts_wrs++; 2034 txpkts->npkt = 0; /* emptied */ 2035} 2036 2037static inline void 2038write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, 2039 struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl) 2040{ 2041 struct ulp_txpkt *ulpmc; 2042 struct ulptx_idata *ulpsc; 2043 struct cpl_tx_pkt_core *cpl; 2044 struct sge_eq *eq = &txq->eq; 2045 uintptr_t flitp, start, end; 2046 uint64_t ctrl; 2047 caddr_t dst; 2048 2049 KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__)); 2050 2051 start = (uintptr_t)eq->desc; 2052 end = (uintptr_t)eq->spg; 2053 2054 /* Checksum offload */ 2055 ctrl = 0; 2056 if (!(m->m_pkthdr.csum_flags & CSUM_IP)) 2057 ctrl |= F_TXPKT_IPCSUM_DIS; 2058 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) 2059 ctrl |= F_TXPKT_L4CSUM_DIS; 2060 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP)) 2061 txq->txcsum++; /* some hardware assistance provided */ 2062 2063 /* VLAN tag insertion */ 2064 if (m->m_flags & M_VLANTAG) { 2065 ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 2066 txq->vlan_insertion++; 2067 } 2068 2069 /* 2070 * The previous packet's SGL must have ended at a 16 byte boundary (this 2071 * is required by the firmware/hardware). It follows that flitp cannot 2072 * wrap around between the ULPTX master command and ULPTX subcommand (8 2073 * bytes each), and that it can not wrap around in the middle of the 2074 * cpl_tx_pkt_core either. 2075 */ 2076 flitp = (uintptr_t)txpkts->flitp; 2077 KASSERT((flitp & 0xf) == 0, 2078 ("%s: last SGL did not end at 16 byte boundary: %p", 2079 __func__, txpkts->flitp)); 2080 2081 /* ULP master command */ 2082 ulpmc = (void *)flitp; 2083 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); 2084 ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) + 2085 sizeof(*cpl) + 8 * sgl->nflits, 16)); 2086 2087 /* ULP subcommand */ 2088 ulpsc = (void *)(ulpmc + 1); 2089 ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) | 2090 F_ULP_TX_SC_MORE); 2091 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 2092 2093 flitp += sizeof(*ulpmc) + sizeof(*ulpsc); 2094 if (flitp == end) 2095 flitp = start; 2096 2097 /* CPL_TX_PKT */ 2098 cpl = (void *)flitp; 2099 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 2100 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 2101 cpl->pack = 0; 2102 cpl->len = htobe16(m->m_pkthdr.len); 2103 cpl->ctrl1 = htobe64(ctrl); 2104 2105 flitp += sizeof(*cpl); 2106 if (flitp == end) 2107 flitp = start; 2108 2109 /* SGL for this frame */ 2110 dst = (caddr_t)flitp; 2111 txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst); 2112 txpkts->flitp = (void *)dst; 2113 2114 KASSERT(((uintptr_t)dst & 0xf) == 0, 2115 ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst)); 2116} 2117 2118/* 2119 * If the SGL ends on an address that is not 16 byte aligned, this function will 2120 * add a 0 filled flit at the end. It returns 1 in that case. 2121 */ 2122static int 2123write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to) 2124{ 2125 __be64 *flitp, *end; 2126 struct ulptx_sgl *usgl; 2127 bus_dma_segment_t *seg; 2128 int i, padded; 2129 2130 KASSERT(sgl->nsegs > 0 && sgl->nflits > 0, 2131 ("%s: bad SGL - nsegs=%d, nflits=%d", 2132 __func__, sgl->nsegs, sgl->nflits)); 2133 2134 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 2135 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 2136 2137 flitp = (__be64 *)(*to); 2138 end = flitp + sgl->nflits; 2139 seg = &sgl->seg[0]; 2140 usgl = (void *)flitp; 2141 2142 /* 2143 * We start at a 16 byte boundary somewhere inside the tx descriptor 2144 * ring, so we're at least 16 bytes away from the status page. There is 2145 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 2146 */ 2147 2148 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 2149 V_ULPTX_NSGE(sgl->nsegs)); 2150 usgl->len0 = htobe32(seg->ds_len); 2151 usgl->addr0 = htobe64(seg->ds_addr); 2152 seg++; 2153 2154 if ((uintptr_t)end <= (uintptr_t)eq->spg) { 2155 2156 /* Won't wrap around at all */ 2157 2158 for (i = 0; i < sgl->nsegs - 1; i++, seg++) { 2159 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len); 2160 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr); 2161 } 2162 if (i & 1) 2163 usgl->sge[i / 2].len[1] = htobe32(0); 2164 } else { 2165 2166 /* Will wrap somewhere in the rest of the SGL */ 2167 2168 /* 2 flits already written, write the rest flit by flit */ 2169 flitp = (void *)(usgl + 1); 2170 for (i = 0; i < sgl->nflits - 2; i++) { 2171 if ((uintptr_t)flitp == (uintptr_t)eq->spg) 2172 flitp = (void *)eq->desc; 2173 *flitp++ = get_flit(seg, sgl->nsegs - 1, i); 2174 } 2175 end = flitp; 2176 } 2177 2178 if ((uintptr_t)end & 0xf) { 2179 *(uint64_t *)end = 0; 2180 end++; 2181 padded = 1; 2182 } else 2183 padded = 0; 2184 2185 if ((uintptr_t)end == (uintptr_t)eq->spg) 2186 *to = (void *)eq->desc; 2187 else 2188 *to = (void *)end; 2189 2190 return (padded); 2191} 2192 2193static inline void 2194copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 2195{ 2196 if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) { 2197 bcopy(from, *to, len); 2198 (*to) += len; 2199 } else { 2200 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to); 2201 2202 bcopy(from, *to, portion); 2203 from += portion; 2204 portion = len - portion; /* remaining */ 2205 bcopy(from, (void *)eq->desc, portion); 2206 (*to) = (caddr_t)eq->desc + portion; 2207 } 2208} 2209 2210static inline void 2211ring_tx_db(struct adapter *sc, struct sge_eq *eq) 2212{ 2213 wmb(); 2214 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), 2215 V_QID(eq->cntxt_id) | V_PIDX(eq->pending)); 2216 eq->pending = 0; 2217} 2218 2219static int 2220reclaim_tx_descs(struct sge_eq *eq, int atleast, int howmany) 2221{ 2222 struct tx_sdesc *txsd; 2223 struct tx_map *txm, *next_txm; 2224 unsigned int cidx, can_reclaim, reclaimed, maps, next_map_cidx; 2225 2226 EQ_LOCK_ASSERT_OWNED(eq); 2227 2228 cidx = eq->spg->cidx; /* stable snapshot */ 2229 cidx = be16_to_cpu(cidx); 2230 2231 if (cidx >= eq->cidx) 2232 can_reclaim = cidx - eq->cidx; 2233 else 2234 can_reclaim = cidx + eq->cap - eq->cidx; 2235 2236 if (can_reclaim < atleast) 2237 return (0); 2238 2239 next_map_cidx = eq->map_cidx; 2240 next_txm = txm = &eq->maps[next_map_cidx]; 2241 prefetch(txm); 2242 2243 maps = reclaimed = 0; 2244 do { 2245 int ndesc; 2246 2247 txsd = &eq->sdesc[eq->cidx]; 2248 ndesc = txsd->desc_used; 2249 2250 /* Firmware doesn't return "partial" credits. */ 2251 KASSERT(can_reclaim >= ndesc, 2252 ("%s: unexpected number of credits: %d, %d", 2253 __func__, can_reclaim, ndesc)); 2254 2255 maps += txsd->map_used; 2256 reclaimed += ndesc; 2257 2258 eq->cidx += ndesc; 2259 if (eq->cidx >= eq->cap) 2260 eq->cidx -= eq->cap; 2261 2262 can_reclaim -= ndesc; 2263 2264 } while (can_reclaim && reclaimed < howmany); 2265 2266 eq->avail += reclaimed; 2267 KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */ 2268 ("%s: too many descriptors available", __func__)); 2269 2270 eq->map_avail += maps; 2271 KASSERT(eq->map_avail <= eq->map_total, 2272 ("%s: too many maps available", __func__)); 2273 2274 prefetch(txm->m); 2275 while (maps--) { 2276 next_txm++; 2277 if (++next_map_cidx == eq->map_total) { 2278 next_map_cidx = 0; 2279 next_txm = eq->maps; 2280 } 2281 prefetch(next_txm->m); 2282 2283 bus_dmamap_unload(eq->tx_tag, txm->map); 2284 m_freem(txm->m); 2285 txm->m = NULL; 2286 2287 txm = next_txm; 2288 } 2289 eq->map_cidx = next_map_cidx; 2290 2291 return (reclaimed); 2292} 2293 2294static void 2295write_eqflush_wr(struct sge_eq *eq) 2296{ 2297 struct fw_eq_flush_wr *wr; 2298 struct tx_sdesc *txsd; 2299 2300 EQ_LOCK_ASSERT_OWNED(eq); 2301 KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__)); 2302 2303 wr = (void *)&eq->desc[eq->pidx]; 2304 bzero(wr, sizeof(*wr)); 2305 wr->opcode = FW_EQ_FLUSH_WR; 2306 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) | 2307 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 2308 2309 txsd = &eq->sdesc[eq->pidx]; 2310 txsd->desc_used = 1; 2311 txsd->map_used = 0; 2312 2313 eq->pending++; 2314 eq->avail--; 2315 if (++eq->pidx == eq->cap) 2316 eq->pidx = 0; 2317} 2318 2319static __be64 2320get_flit(bus_dma_segment_t *sgl, int nsegs, int idx) 2321{ 2322 int i = (idx / 3) * 2; 2323 2324 switch (idx % 3) { 2325 case 0: { 2326 __be64 rc; 2327 2328 rc = htobe32(sgl[i].ds_len); 2329 if (i + 1 < nsegs) 2330 rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32; 2331 2332 return (rc); 2333 } 2334 case 1: 2335 return htobe64(sgl[i].ds_addr); 2336 case 2: 2337 return htobe64(sgl[i + 1].ds_addr); 2338 } 2339 2340 return (0); 2341} 2342 2343static struct mbuf * 2344get_fl_sdesc_data(struct sge_fl *fl, int len, int flags) 2345{ 2346 struct fl_sdesc *sd; 2347 struct mbuf *m; 2348 2349 sd = &fl->sdesc[fl->cidx]; 2350 FL_LOCK(fl); 2351 if (++fl->cidx == fl->cap) 2352 fl->cidx = 0; 2353 fl->needed++; 2354 FL_UNLOCK(fl); 2355 2356 m = sd->m; 2357 if (m == NULL) { 2358 m = m_gethdr(M_NOWAIT, MT_NOINIT); 2359 if (m == NULL) 2360 return (NULL); 2361 } 2362 sd->m = NULL; /* consumed */ 2363 2364 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); 2365 m_init(m, zone_mbuf, MLEN, M_NOWAIT, MT_DATA, flags); 2366 if ((flags && len < MINCLSIZE) || (!flags && len <= MLEN)) 2367 bcopy(sd->cl, mtod(m, caddr_t), len); 2368 else { 2369 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 2370 m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx)); 2371 sd->cl = NULL; /* consumed */ 2372 } 2373 2374 m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); 2375 2376 return (m); 2377} 2378 2379static void 2380set_fl_tag_idx(struct sge_fl *fl, int mtu) 2381{ 2382 int i; 2383 2384 FL_LOCK_ASSERT_OWNED(fl); 2385 2386 for (i = 0; i < FL_BUF_SIZES - 1; i++) { 2387 if (FL_BUF_SIZE(i) >= (mtu + FL_PKTSHIFT)) 2388 break; 2389 } 2390 2391 fl->tag_idx = i; 2392} 2393