t4_sge.c revision 237819
1/*- 2 * Copyright (c) 2011 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_sge.c 237819 2012-06-29 19:51:06Z np $"); 30 31#include "opt_inet.h" 32#include "opt_inet6.h" 33 34#include <sys/types.h> 35#include <sys/mbuf.h> 36#include <sys/socket.h> 37#include <sys/kernel.h> 38#include <sys/kdb.h> 39#include <sys/malloc.h> 40#include <sys/queue.h> 41#include <sys/taskqueue.h> 42#include <sys/sysctl.h> 43#include <sys/smp.h> 44#include <net/bpf.h> 45#include <net/ethernet.h> 46#include <net/if.h> 47#include <net/if_vlan_var.h> 48#include <netinet/in.h> 49#include <netinet/ip.h> 50#include <netinet/ip6.h> 51#include <netinet/tcp.h> 52 53#include "common/common.h" 54#include "common/t4_regs.h" 55#include "common/t4_regs_values.h" 56#include "common/t4_msg.h" 57 58struct fl_buf_info { 59 int size; 60 int type; 61 uma_zone_t zone; 62}; 63 64/* Filled up by t4_sge_modload */ 65static struct fl_buf_info fl_buf_info[FL_BUF_SIZES]; 66 67#define FL_BUF_SIZE(x) (fl_buf_info[x].size) 68#define FL_BUF_TYPE(x) (fl_buf_info[x].type) 69#define FL_BUF_ZONE(x) (fl_buf_info[x].zone) 70 71enum { 72 FL_PKTSHIFT = 2 73}; 74 75static int fl_pad = CACHE_LINE_SIZE; 76static int spg_len = 64; 77 78/* Used to track coalesced tx work request */ 79struct txpkts { 80 uint64_t *flitp; /* ptr to flit where next pkt should start */ 81 uint8_t npkt; /* # of packets in this work request */ 82 uint8_t nflits; /* # of flits used by this work request */ 83 uint16_t plen; /* total payload (sum of all packets) */ 84}; 85 86/* A packet's SGL. This + m_pkthdr has all info needed for tx */ 87struct sgl { 88 int nsegs; /* # of segments in the SGL, 0 means imm. tx */ 89 int nflits; /* # of flits needed for the SGL */ 90 bus_dma_segment_t seg[TX_SGL_SEGS]; 91}; 92 93static int service_iq(struct sge_iq *, int); 94static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t, 95 int *); 96static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); 97static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, 98 int, char *); 99static inline void init_fl(struct sge_fl *, int, int, char *); 100static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t, 101 char *); 102static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 103 bus_addr_t *, void **); 104static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 105 void *); 106static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, 107 int, int); 108static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); 109static int alloc_fwq(struct adapter *); 110static int free_fwq(struct adapter *); 111static int alloc_mgmtq(struct adapter *); 112static int free_mgmtq(struct adapter *); 113static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int, 114 struct sysctl_oid *); 115static int free_rxq(struct port_info *, struct sge_rxq *); 116#ifdef TCP_OFFLOAD 117static int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int, 118 struct sysctl_oid *); 119static int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *); 120#endif 121static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 122static int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *); 123#ifdef TCP_OFFLOAD 124static int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *); 125#endif 126static int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *); 127static int free_eq(struct adapter *, struct sge_eq *); 128static int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *, 129 struct sysctl_oid *); 130static int free_wrq(struct adapter *, struct sge_wrq *); 131static int alloc_txq(struct port_info *, struct sge_txq *, int, 132 struct sysctl_oid *); 133static int free_txq(struct port_info *, struct sge_txq *); 134static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 135static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **); 136static inline void iq_next(struct sge_iq *); 137static inline void ring_fl_db(struct adapter *, struct sge_fl *); 138static int refill_fl(struct adapter *, struct sge_fl *, int); 139static void refill_sfl(void *); 140static int alloc_fl_sdesc(struct sge_fl *); 141static void free_fl_sdesc(struct sge_fl *); 142static void set_fl_tag_idx(struct sge_fl *, int); 143static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 144 145static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int); 146static int free_pkt_sgl(struct sge_txq *, struct sgl *); 147static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *, 148 struct sgl *); 149static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *, 150 struct mbuf *, struct sgl *); 151static void write_txpkts_wr(struct sge_txq *, struct txpkts *); 152static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *, 153 struct txpkts *, struct mbuf *, struct sgl *); 154static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *); 155static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 156static inline void ring_eq_db(struct adapter *, struct sge_eq *); 157static inline int reclaimable(struct sge_eq *); 158static int reclaim_tx_descs(struct sge_txq *, int, int); 159static void write_eqflush_wr(struct sge_eq *); 160static __be64 get_flit(bus_dma_segment_t *, int, int); 161static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 162 struct mbuf *); 163static int handle_fw_rpl(struct sge_iq *, const struct rss_header *, 164 struct mbuf *); 165 166static int sysctl_uint16(SYSCTL_HANDLER_ARGS); 167 168#if defined(__i386__) || defined(__amd64__) 169extern u_int cpu_clflush_line_size; 170#endif 171 172/* 173 * Called on MOD_LOAD and fills up fl_buf_info[]. 174 */ 175void 176t4_sge_modload(void) 177{ 178 int i; 179 int bufsize[FL_BUF_SIZES] = { 180 MCLBYTES, 181#if MJUMPAGESIZE != MCLBYTES 182 MJUMPAGESIZE, 183#endif 184 MJUM9BYTES, 185 MJUM16BYTES 186 }; 187 188 for (i = 0; i < FL_BUF_SIZES; i++) { 189 FL_BUF_SIZE(i) = bufsize[i]; 190 FL_BUF_TYPE(i) = m_gettype(bufsize[i]); 191 FL_BUF_ZONE(i) = m_getzone(bufsize[i]); 192 } 193 194#if defined(__i386__) || defined(__amd64__) 195 fl_pad = max(cpu_clflush_line_size, 32); 196 spg_len = cpu_clflush_line_size > 64 ? 128 : 64; 197#endif 198} 199 200/** 201 * t4_sge_init - initialize SGE 202 * @sc: the adapter 203 * 204 * Performs SGE initialization needed every time after a chip reset. 205 * We do not initialize any of the queues here, instead the driver 206 * top-level must request them individually. 207 */ 208int 209t4_sge_init(struct adapter *sc) 210{ 211 struct sge *s = &sc->sge; 212 int i, rc = 0; 213 uint32_t ctrl_mask, ctrl_val, hpsize, v; 214 215 ctrl_mask = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | 216 V_INGPADBOUNDARY(M_INGPADBOUNDARY) | 217 F_EGRSTATUSPAGESIZE; 218 ctrl_val = V_PKTSHIFT(FL_PKTSHIFT) | F_RXPKTCPLMODE | 219 V_INGPADBOUNDARY(ilog2(fl_pad) - 5) | 220 V_EGRSTATUSPAGESIZE(spg_len == 128); 221 222 hpsize = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 223 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 224 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 225 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 226 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 227 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 228 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 229 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 230 231 if (sc->flags & MASTER_PF) { 232 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 233 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 234 235 t4_set_reg_field(sc, A_SGE_CONTROL, ctrl_mask, ctrl_val); 236 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, hpsize); 237 for (i = 0; i < FL_BUF_SIZES; i++) { 238 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), 239 FL_BUF_SIZE(i)); 240 } 241 242 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, 243 V_THRESHOLD_0(intr_pktcount[0]) | 244 V_THRESHOLD_1(intr_pktcount[1]) | 245 V_THRESHOLD_2(intr_pktcount[2]) | 246 V_THRESHOLD_3(intr_pktcount[3])); 247 248 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, 249 V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 250 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1]))); 251 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, 252 V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 253 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3]))); 254 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, 255 V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 256 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5]))); 257 } 258 259 v = t4_read_reg(sc, A_SGE_CONTROL); 260 if ((v & ctrl_mask) != ctrl_val) { 261 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", v); 262 rc = EINVAL; 263 } 264 265 v = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE); 266 if (v != hpsize) { 267 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", v); 268 rc = EINVAL; 269 } 270 271 for (i = 0; i < FL_BUF_SIZES; i++) { 272 v = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i)); 273 if (v != FL_BUF_SIZE(i)) { 274 device_printf(sc->dev, 275 "invalid SGE_FL_BUFFER_SIZE[%d](0x%x)\n", i, v); 276 rc = EINVAL; 277 } 278 } 279 280 v = t4_read_reg(sc, A_SGE_CONM_CTRL); 281 s->fl_starve_threshold = G_EGRTHRESHOLD(v) * 2 + 1; 282 283 v = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD); 284 sc->sge.counter_val[0] = G_THRESHOLD_0(v); 285 sc->sge.counter_val[1] = G_THRESHOLD_1(v); 286 sc->sge.counter_val[2] = G_THRESHOLD_2(v); 287 sc->sge.counter_val[3] = G_THRESHOLD_3(v); 288 289 v = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1); 290 sc->sge.timer_val[0] = G_TIMERVALUE0(v) / core_ticks_per_usec(sc); 291 sc->sge.timer_val[1] = G_TIMERVALUE1(v) / core_ticks_per_usec(sc); 292 v = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3); 293 sc->sge.timer_val[2] = G_TIMERVALUE2(v) / core_ticks_per_usec(sc); 294 sc->sge.timer_val[3] = G_TIMERVALUE3(v) / core_ticks_per_usec(sc); 295 v = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5); 296 sc->sge.timer_val[4] = G_TIMERVALUE4(v) / core_ticks_per_usec(sc); 297 sc->sge.timer_val[5] = G_TIMERVALUE5(v) / core_ticks_per_usec(sc); 298 299 t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_rpl); 300 t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_rpl); 301 t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 302 t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx); 303 304 return (rc); 305} 306 307int 308t4_create_dma_tag(struct adapter *sc) 309{ 310 int rc; 311 312 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 313 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 314 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 315 NULL, &sc->dmat); 316 if (rc != 0) { 317 device_printf(sc->dev, 318 "failed to create main DMA tag: %d\n", rc); 319 } 320 321 return (rc); 322} 323 324int 325t4_destroy_dma_tag(struct adapter *sc) 326{ 327 if (sc->dmat) 328 bus_dma_tag_destroy(sc->dmat); 329 330 return (0); 331} 332 333/* 334 * Allocate and initialize the firmware event queue and the management queue. 335 * 336 * Returns errno on failure. Resources allocated up to that point may still be 337 * allocated. Caller is responsible for cleanup in case this function fails. 338 */ 339int 340t4_setup_adapter_queues(struct adapter *sc) 341{ 342 int rc; 343 344 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 345 346 sysctl_ctx_init(&sc->ctx); 347 sc->flags |= ADAP_SYSCTL_CTX; 348 349 /* 350 * Firmware event queue 351 */ 352 rc = alloc_fwq(sc); 353 if (rc != 0) { 354 device_printf(sc->dev, 355 "failed to create firmware event queue: %d\n", rc); 356 return (rc); 357 } 358 359 /* 360 * Management queue. This is just a control queue that uses the fwq as 361 * its associated iq. 362 */ 363 rc = alloc_mgmtq(sc); 364 if (rc != 0) { 365 device_printf(sc->dev, 366 "failed to create management queue: %d\n", rc); 367 return (rc); 368 } 369 370 return (rc); 371} 372 373/* 374 * Idempotent 375 */ 376int 377t4_teardown_adapter_queues(struct adapter *sc) 378{ 379 380 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 381 382 /* Do this before freeing the queue */ 383 if (sc->flags & ADAP_SYSCTL_CTX) { 384 sysctl_ctx_free(&sc->ctx); 385 sc->flags &= ~ADAP_SYSCTL_CTX; 386 } 387 388 free_mgmtq(sc); 389 free_fwq(sc); 390 391 return (0); 392} 393 394static inline int 395first_vector(struct port_info *pi) 396{ 397 struct adapter *sc = pi->adapter; 398 int rc = T4_EXTRA_INTR, i; 399 400 if (sc->intr_count == 1) 401 return (0); 402 403 for_each_port(sc, i) { 404 if (i == pi->port_id) 405 break; 406 407#ifdef TCP_OFFLOAD 408 if (sc->flags & INTR_DIRECT) 409 rc += pi->nrxq + pi->nofldrxq; 410 else 411 rc += max(pi->nrxq, pi->nofldrxq); 412#else 413 /* 414 * Not compiled with offload support and intr_count > 1. Only 415 * NIC queues exist and they'd better be taking direct 416 * interrupts. 417 */ 418 KASSERT(sc->flags & INTR_DIRECT, 419 ("%s: intr_count %d, !INTR_DIRECT", __func__, 420 sc->intr_count)); 421 422 rc += pi->nrxq; 423#endif 424 } 425 426 return (rc); 427} 428 429/* 430 * Given an arbitrary "index," come up with an iq that can be used by other 431 * queues (of this port) for interrupt forwarding, SGE egress updates, etc. 432 * The iq returned is guaranteed to be something that takes direct interrupts. 433 */ 434static struct sge_iq * 435port_intr_iq(struct port_info *pi, int idx) 436{ 437 struct adapter *sc = pi->adapter; 438 struct sge *s = &sc->sge; 439 struct sge_iq *iq = NULL; 440 441 if (sc->intr_count == 1) 442 return (&sc->sge.fwq); 443 444#ifdef TCP_OFFLOAD 445 if (sc->flags & INTR_DIRECT) { 446 idx %= pi->nrxq + pi->nofldrxq; 447 448 if (idx >= pi->nrxq) { 449 idx -= pi->nrxq; 450 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; 451 } else 452 iq = &s->rxq[pi->first_rxq + idx].iq; 453 454 } else { 455 idx %= max(pi->nrxq, pi->nofldrxq); 456 457 if (pi->nrxq >= pi->nofldrxq) 458 iq = &s->rxq[pi->first_rxq + idx].iq; 459 else 460 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; 461 } 462#else 463 /* 464 * Not compiled with offload support and intr_count > 1. Only NIC 465 * queues exist and they'd better be taking direct interrupts. 466 */ 467 KASSERT(sc->flags & INTR_DIRECT, 468 ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count)); 469 470 idx %= pi->nrxq; 471 iq = &s->rxq[pi->first_rxq + idx].iq; 472#endif 473 474 KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__)); 475 return (iq); 476} 477 478int 479t4_setup_port_queues(struct port_info *pi) 480{ 481 int rc = 0, i, j, intr_idx, iqid; 482 struct sge_rxq *rxq; 483 struct sge_txq *txq; 484 struct sge_wrq *ctrlq; 485#ifdef TCP_OFFLOAD 486 struct sge_ofld_rxq *ofld_rxq; 487 struct sge_wrq *ofld_txq; 488 struct sysctl_oid *oid2 = NULL; 489#endif 490 char name[16]; 491 struct adapter *sc = pi->adapter; 492 struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); 493 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 494 495 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, 496 NULL, "rx queues"); 497 498#ifdef TCP_OFFLOAD 499 if (is_offload(sc)) { 500 oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq", 501 CTLFLAG_RD, NULL, 502 "rx queues for offloaded TCP connections"); 503 } 504#endif 505 506 /* Interrupt vector to start from (when using multiple vectors) */ 507 intr_idx = first_vector(pi); 508 509 /* 510 * First pass over all rx queues (NIC and TOE): 511 * a) initialize iq and fl 512 * b) allocate queue iff it will take direct interrupts. 513 */ 514 for_each_rxq(pi, i, rxq) { 515 516 snprintf(name, sizeof(name), "%s rxq%d-iq", 517 device_get_nameunit(pi->dev), i); 518 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq, 519 RX_IQ_ESIZE, name); 520 521 snprintf(name, sizeof(name), "%s rxq%d-fl", 522 device_get_nameunit(pi->dev), i); 523 init_fl(&rxq->fl, pi->qsize_rxq / 8, pi->ifp->if_mtu, name); 524 525 if (sc->flags & INTR_DIRECT 526#ifdef TCP_OFFLOAD 527 || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq) 528#endif 529 ) { 530 rxq->iq.flags |= IQ_INTR; 531 rc = alloc_rxq(pi, rxq, intr_idx, i, oid); 532 if (rc != 0) 533 goto done; 534 intr_idx++; 535 } 536 } 537 538#ifdef TCP_OFFLOAD 539 for_each_ofld_rxq(pi, i, ofld_rxq) { 540 541 snprintf(name, sizeof(name), "%s ofld_rxq%d-iq", 542 device_get_nameunit(pi->dev), i); 543 init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, 544 pi->qsize_rxq, RX_IQ_ESIZE, name); 545 546 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 547 device_get_nameunit(pi->dev), i); 548 init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, MJUM16BYTES, name); 549 550 if (sc->flags & INTR_DIRECT || 551 (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) { 552 ofld_rxq->iq.flags |= IQ_INTR; 553 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2); 554 if (rc != 0) 555 goto done; 556 intr_idx++; 557 } 558 } 559#endif 560 561 /* 562 * Second pass over all rx queues (NIC and TOE). The queues forwarding 563 * their interrupts are allocated now. 564 */ 565 j = 0; 566 for_each_rxq(pi, i, rxq) { 567 if (rxq->iq.flags & IQ_INTR) 568 continue; 569 570 intr_idx = port_intr_iq(pi, j)->abs_id; 571 572 rc = alloc_rxq(pi, rxq, intr_idx, i, oid); 573 if (rc != 0) 574 goto done; 575 j++; 576 } 577 578#ifdef TCP_OFFLOAD 579 for_each_ofld_rxq(pi, i, ofld_rxq) { 580 if (ofld_rxq->iq.flags & IQ_INTR) 581 continue; 582 583 intr_idx = port_intr_iq(pi, j)->abs_id; 584 585 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2); 586 if (rc != 0) 587 goto done; 588 j++; 589 } 590#endif 591 592 /* 593 * Now the tx queues. Only one pass needed. 594 */ 595 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, 596 NULL, "tx queues"); 597 j = 0; 598 for_each_txq(pi, i, txq) { 599 uint16_t iqid; 600 601 iqid = port_intr_iq(pi, j)->cntxt_id; 602 603 snprintf(name, sizeof(name), "%s txq%d", 604 device_get_nameunit(pi->dev), i); 605 init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid, 606 name); 607 608 rc = alloc_txq(pi, txq, i, oid); 609 if (rc != 0) 610 goto done; 611 j++; 612 } 613 614#ifdef TCP_OFFLOAD 615 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq", 616 CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections"); 617 for_each_ofld_txq(pi, i, ofld_txq) { 618 uint16_t iqid; 619 620 iqid = port_intr_iq(pi, j)->cntxt_id; 621 622 snprintf(name, sizeof(name), "%s ofld_txq%d", 623 device_get_nameunit(pi->dev), i); 624 init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan, 625 iqid, name); 626 627 snprintf(name, sizeof(name), "%d", i); 628 oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 629 name, CTLFLAG_RD, NULL, "offload tx queue"); 630 631 rc = alloc_wrq(sc, pi, ofld_txq, oid2); 632 if (rc != 0) 633 goto done; 634 j++; 635 } 636#endif 637 638 /* 639 * Finally, the control queue. 640 */ 641 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, 642 NULL, "ctrl queue"); 643 ctrlq = &sc->sge.ctrlq[pi->port_id]; 644 iqid = port_intr_iq(pi, 0)->cntxt_id; 645 snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev)); 646 init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name); 647 rc = alloc_wrq(sc, pi, ctrlq, oid); 648 649done: 650 if (rc) 651 t4_teardown_port_queues(pi); 652 653 return (rc); 654} 655 656/* 657 * Idempotent 658 */ 659int 660t4_teardown_port_queues(struct port_info *pi) 661{ 662 int i; 663 struct adapter *sc = pi->adapter; 664 struct sge_rxq *rxq; 665 struct sge_txq *txq; 666#ifdef TCP_OFFLOAD 667 struct sge_ofld_rxq *ofld_rxq; 668 struct sge_wrq *ofld_txq; 669#endif 670 671 /* Do this before freeing the queues */ 672 if (pi->flags & PORT_SYSCTL_CTX) { 673 sysctl_ctx_free(&pi->ctx); 674 pi->flags &= ~PORT_SYSCTL_CTX; 675 } 676 677 /* 678 * Take down all the tx queues first, as they reference the rx queues 679 * (for egress updates, etc.). 680 */ 681 682 free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); 683 684 for_each_txq(pi, i, txq) { 685 free_txq(pi, txq); 686 } 687 688#ifdef TCP_OFFLOAD 689 for_each_ofld_txq(pi, i, ofld_txq) { 690 free_wrq(sc, ofld_txq); 691 } 692#endif 693 694 /* 695 * Then take down the rx queues that forward their interrupts, as they 696 * reference other rx queues. 697 */ 698 699 for_each_rxq(pi, i, rxq) { 700 if ((rxq->iq.flags & IQ_INTR) == 0) 701 free_rxq(pi, rxq); 702 } 703 704#ifdef TCP_OFFLOAD 705 for_each_ofld_rxq(pi, i, ofld_rxq) { 706 if ((ofld_rxq->iq.flags & IQ_INTR) == 0) 707 free_ofld_rxq(pi, ofld_rxq); 708 } 709#endif 710 711 /* 712 * Then take down the rx queues that take direct interrupts. 713 */ 714 715 for_each_rxq(pi, i, rxq) { 716 if (rxq->iq.flags & IQ_INTR) 717 free_rxq(pi, rxq); 718 } 719 720#ifdef TCP_OFFLOAD 721 for_each_ofld_rxq(pi, i, ofld_rxq) { 722 if (ofld_rxq->iq.flags & IQ_INTR) 723 free_ofld_rxq(pi, ofld_rxq); 724 } 725#endif 726 727 return (0); 728} 729 730/* 731 * Deals with errors and the firmware event queue. All data rx queues forward 732 * their interrupt to the firmware event queue. 733 */ 734void 735t4_intr_all(void *arg) 736{ 737 struct adapter *sc = arg; 738 struct sge_iq *fwq = &sc->sge.fwq; 739 740 t4_intr_err(arg); 741 if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) { 742 service_iq(fwq, 0); 743 atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE); 744 } 745} 746 747/* Deals with error interrupts */ 748void 749t4_intr_err(void *arg) 750{ 751 struct adapter *sc = arg; 752 753 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 754 t4_slow_intr_handler(sc); 755} 756 757void 758t4_intr_evt(void *arg) 759{ 760 struct sge_iq *iq = arg; 761 762 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 763 service_iq(iq, 0); 764 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 765 } 766} 767 768void 769t4_intr(void *arg) 770{ 771 struct sge_iq *iq = arg; 772 773 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 774 service_iq(iq, 0); 775 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 776 } 777} 778 779/* 780 * Deals with anything and everything on the given ingress queue. 781 */ 782static int 783service_iq(struct sge_iq *iq, int budget) 784{ 785 struct sge_iq *q; 786 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ 787 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */ 788 struct adapter *sc = iq->adapter; 789 struct rsp_ctrl *ctrl; 790 const struct rss_header *rss; 791 int ndescs = 0, limit, fl_bufs_used = 0; 792 int rsp_type; 793 uint32_t lq; 794 struct mbuf *m0; 795 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 796 797 limit = budget ? budget : iq->qsize / 8; 798 799 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 800 801 /* 802 * We always come back and check the descriptor ring for new indirect 803 * interrupts and other responses after running a single handler. 804 */ 805 for (;;) { 806 while (is_new_response(iq, &ctrl)) { 807 808 rmb(); 809 810 m0 = NULL; 811 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); 812 lq = be32toh(ctrl->pldbuflen_qid); 813 rss = (const void *)iq->cdesc; 814 815 switch (rsp_type) { 816 case X_RSPD_TYPE_FLBUF: 817 818 KASSERT(iq->flags & IQ_HAS_FL, 819 ("%s: data for an iq (%p) with no freelist", 820 __func__, iq)); 821 822 m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used); 823#ifdef T4_PKT_TIMESTAMP 824 /* 825 * 60 bit timestamp for the payload is 826 * *(uint64_t *)m0->m_pktdat. Note that it is 827 * in the leading free-space in the mbuf. The 828 * kernel can clobber it during a pullup, 829 * m_copymdata, etc. You need to make sure that 830 * the mbuf reaches you unmolested if you care 831 * about the timestamp. 832 */ 833 *(uint64_t *)m0->m_pktdat = 834 be64toh(ctrl->u.last_flit) & 835 0xfffffffffffffff; 836#endif 837 838 /* fall through */ 839 840 case X_RSPD_TYPE_CPL: 841 KASSERT(rss->opcode < NUM_CPL_CMDS, 842 ("%s: bad opcode %02x.", __func__, 843 rss->opcode)); 844 sc->cpl_handler[rss->opcode](iq, rss, m0); 845 break; 846 847 case X_RSPD_TYPE_INTR: 848 849 /* 850 * Interrupts should be forwarded only to queues 851 * that are not forwarding their interrupts. 852 * This means service_iq can recurse but only 1 853 * level deep. 854 */ 855 KASSERT(budget == 0, 856 ("%s: budget %u, rsp_type %u", __func__, 857 budget, rsp_type)); 858 859 q = sc->sge.iqmap[lq - sc->sge.iq_start]; 860 if (atomic_cmpset_int(&q->state, IQS_IDLE, 861 IQS_BUSY)) { 862 if (service_iq(q, q->qsize / 8) == 0) { 863 atomic_cmpset_int(&q->state, 864 IQS_BUSY, IQS_IDLE); 865 } else { 866 STAILQ_INSERT_TAIL(&iql, q, 867 link); 868 } 869 } 870 break; 871 872 default: 873 sc->an_handler(iq, ctrl); 874 break; 875 } 876 877 iq_next(iq); 878 if (++ndescs == limit) { 879 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 880 V_CIDXINC(ndescs) | 881 V_INGRESSQID(iq->cntxt_id) | 882 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 883 ndescs = 0; 884 885 if (fl_bufs_used > 0) { 886 FL_LOCK(fl); 887 fl->needed += fl_bufs_used; 888 refill_fl(sc, fl, fl->cap / 8); 889 FL_UNLOCK(fl); 890 fl_bufs_used = 0; 891 } 892 893 if (budget) 894 return (EINPROGRESS); 895 } 896 } 897 898 if (STAILQ_EMPTY(&iql)) 899 break; 900 901 /* 902 * Process the head only, and send it to the back of the list if 903 * it's still not done. 904 */ 905 q = STAILQ_FIRST(&iql); 906 STAILQ_REMOVE_HEAD(&iql, link); 907 if (service_iq(q, q->qsize / 8) == 0) 908 atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 909 else 910 STAILQ_INSERT_TAIL(&iql, q, link); 911 } 912 913#if defined(INET) || defined(INET6) 914 if (iq->flags & IQ_LRO_ENABLED) { 915 struct lro_ctrl *lro = &rxq->lro; 916 struct lro_entry *l; 917 918 while (!SLIST_EMPTY(&lro->lro_active)) { 919 l = SLIST_FIRST(&lro->lro_active); 920 SLIST_REMOVE_HEAD(&lro->lro_active, next); 921 tcp_lro_flush(lro, l); 922 } 923 } 924#endif 925 926 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | 927 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 928 929 if (iq->flags & IQ_HAS_FL) { 930 int starved; 931 932 FL_LOCK(fl); 933 fl->needed += fl_bufs_used; 934 starved = refill_fl(sc, fl, fl->cap / 4); 935 FL_UNLOCK(fl); 936 if (__predict_false(starved != 0)) 937 add_fl_to_sfl(sc, fl); 938 } 939 940 return (0); 941} 942 943 944#ifdef T4_PKT_TIMESTAMP 945#define RX_COPY_THRESHOLD (MINCLSIZE - 8) 946#else 947#define RX_COPY_THRESHOLD MINCLSIZE 948#endif 949 950static struct mbuf * 951get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf, 952 int *fl_bufs_used) 953{ 954 struct mbuf *m0, *m; 955 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 956 unsigned int nbuf, len; 957 958 /* 959 * No assertion for the fl lock because we don't need it. This routine 960 * is called only from the rx interrupt handler and it only updates 961 * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be 962 * updated in the rx interrupt handler or the starvation helper routine. 963 * That's why code that manipulates fl->pidx/fl->needed needs the fl 964 * lock but this routine does not). 965 */ 966 967 if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0)) 968 panic("%s: cannot handle packed frames", __func__); 969 len = G_RSPD_LEN(len_newbuf); 970 971 m0 = sd->m; 972 sd->m = NULL; /* consumed */ 973 974 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); 975 m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR); 976#ifdef T4_PKT_TIMESTAMP 977 /* Leave room for a timestamp */ 978 m0->m_data += 8; 979#endif 980 981 if (len < RX_COPY_THRESHOLD) { 982 /* copy data to mbuf, buffer will be recycled */ 983 bcopy(sd->cl, mtod(m0, caddr_t), len); 984 m0->m_len = len; 985 } else { 986 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 987 m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx)); 988 sd->cl = NULL; /* consumed */ 989 m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); 990 } 991 m0->m_pkthdr.len = len; 992 993 sd++; 994 if (__predict_false(++fl->cidx == fl->cap)) { 995 sd = fl->sdesc; 996 fl->cidx = 0; 997 } 998 999 m = m0; 1000 len -= m->m_len; 1001 nbuf = 1; /* # of fl buffers used */ 1002 1003 while (len > 0) { 1004 m->m_next = sd->m; 1005 sd->m = NULL; /* consumed */ 1006 m = m->m_next; 1007 1008 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, 1009 BUS_DMASYNC_POSTREAD); 1010 1011 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); 1012 if (len <= MLEN) { 1013 bcopy(sd->cl, mtod(m, caddr_t), len); 1014 m->m_len = len; 1015 } else { 1016 bus_dmamap_unload(fl->tag[sd->tag_idx], 1017 sd->map); 1018 m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx)); 1019 sd->cl = NULL; /* consumed */ 1020 m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); 1021 } 1022 1023 sd++; 1024 if (__predict_false(++fl->cidx == fl->cap)) { 1025 sd = fl->sdesc; 1026 fl->cidx = 0; 1027 } 1028 1029 len -= m->m_len; 1030 nbuf++; 1031 } 1032 1033 (*fl_bufs_used) += nbuf; 1034 1035 return (m0); 1036} 1037 1038static int 1039t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 1040{ 1041 struct sge_rxq *rxq = iq_to_rxq(iq); 1042 struct ifnet *ifp = rxq->ifp; 1043 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); 1044#if defined(INET) || defined(INET6) 1045 struct lro_ctrl *lro = &rxq->lro; 1046#endif 1047 1048 KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, 1049 rss->opcode)); 1050 1051 m0->m_pkthdr.len -= FL_PKTSHIFT; 1052 m0->m_len -= FL_PKTSHIFT; 1053 m0->m_data += FL_PKTSHIFT; 1054 1055 m0->m_pkthdr.rcvif = ifp; 1056 m0->m_flags |= M_FLOWID; 1057 m0->m_pkthdr.flowid = rss->hash_val; 1058 1059 if (cpl->csum_calc && !cpl->err_vec) { 1060 if (ifp->if_capenable & IFCAP_RXCSUM && 1061 cpl->l2info & htobe32(F_RXF_IP)) { 1062 m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | 1063 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1064 rxq->rxcsum++; 1065 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 1066 cpl->l2info & htobe32(F_RXF_IP6)) { 1067 m0->m_pkthdr.csum_flags |= (CSUM_DATA_VALID_IPV6 | 1068 CSUM_PSEUDO_HDR); 1069 rxq->rxcsum++; 1070 } 1071 1072 if (__predict_false(cpl->ip_frag)) 1073 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 1074 else 1075 m0->m_pkthdr.csum_data = 0xffff; 1076 } 1077 1078 if (cpl->vlan_ex) { 1079 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 1080 m0->m_flags |= M_VLANTAG; 1081 rxq->vlan_extraction++; 1082 } 1083 1084#if defined(INET) || defined(INET6) 1085 if (cpl->l2info & htobe32(F_RXF_LRO) && 1086 iq->flags & IQ_LRO_ENABLED && 1087 tcp_lro_rx(lro, m0, 0) == 0) { 1088 /* queued for LRO */ 1089 } else 1090#endif 1091 ifp->if_input(ifp, m0); 1092 1093 return (0); 1094} 1095 1096/* 1097 * Doesn't fail. Holds on to work requests it can't send right away. 1098 */ 1099void 1100t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 1101{ 1102 struct sge_eq *eq = &wrq->eq; 1103 int can_reclaim; 1104 caddr_t dst; 1105 1106 TXQ_LOCK_ASSERT_OWNED(wrq); 1107#ifdef TCP_OFFLOAD 1108 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD || 1109 (eq->flags & EQ_TYPEMASK) == EQ_CTRL, 1110 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); 1111#else 1112 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL, 1113 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); 1114#endif 1115 1116 if (__predict_true(wr != NULL)) 1117 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 1118 1119 can_reclaim = reclaimable(eq); 1120 if (__predict_false(eq->flags & EQ_STALLED)) { 1121 if (can_reclaim < tx_resume_threshold(eq)) 1122 return; 1123 eq->flags &= ~EQ_STALLED; 1124 eq->unstalled++; 1125 } 1126 eq->cidx += can_reclaim; 1127 eq->avail += can_reclaim; 1128 if (__predict_false(eq->cidx >= eq->cap)) 1129 eq->cidx -= eq->cap; 1130 1131 while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) { 1132 int ndesc; 1133 1134 if (__predict_false(wr->wr_len < 0 || 1135 wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) { 1136 1137#ifdef INVARIANTS 1138 panic("%s: work request with length %d", __func__, 1139 wr->wr_len); 1140#endif 1141#ifdef KDB 1142 kdb_backtrace(); 1143#endif 1144 log(LOG_ERR, "%s: %s work request with length %d", 1145 device_get_nameunit(sc->dev), __func__, wr->wr_len); 1146 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 1147 free_wrqe(wr); 1148 continue; 1149 } 1150 1151 ndesc = howmany(wr->wr_len, EQ_ESIZE); 1152 if (eq->avail < ndesc) { 1153 wrq->no_desc++; 1154 break; 1155 } 1156 1157 dst = (void *)&eq->desc[eq->pidx]; 1158 copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len); 1159 1160 eq->pidx += ndesc; 1161 eq->avail -= ndesc; 1162 if (__predict_false(eq->pidx >= eq->cap)) 1163 eq->pidx -= eq->cap; 1164 1165 eq->pending += ndesc; 1166 if (eq->pending > 16) 1167 ring_eq_db(sc, eq); 1168 1169 wrq->tx_wrs++; 1170 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 1171 free_wrqe(wr); 1172 1173 if (eq->avail < 8) { 1174 can_reclaim = reclaimable(eq); 1175 eq->cidx += can_reclaim; 1176 eq->avail += can_reclaim; 1177 if (__predict_false(eq->cidx >= eq->cap)) 1178 eq->cidx -= eq->cap; 1179 } 1180 } 1181 1182 if (eq->pending) 1183 ring_eq_db(sc, eq); 1184 1185 if (wr != NULL) { 1186 eq->flags |= EQ_STALLED; 1187 if (callout_pending(&eq->tx_callout) == 0) 1188 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq); 1189 } 1190} 1191 1192/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */ 1193#define TXPKTS_PKT_HDR ((\ 1194 sizeof(struct ulp_txpkt) + \ 1195 sizeof(struct ulptx_idata) + \ 1196 sizeof(struct cpl_tx_pkt_core) \ 1197 ) / 8) 1198 1199/* Header of a coalesced tx WR, before SGL of first packet (in flits) */ 1200#define TXPKTS_WR_HDR (\ 1201 sizeof(struct fw_eth_tx_pkts_wr) / 8 + \ 1202 TXPKTS_PKT_HDR) 1203 1204/* Header of a tx WR, before SGL of first packet (in flits) */ 1205#define TXPKT_WR_HDR ((\ 1206 sizeof(struct fw_eth_tx_pkt_wr) + \ 1207 sizeof(struct cpl_tx_pkt_core) \ 1208 ) / 8 ) 1209 1210/* Header of a tx LSO WR, before SGL of first packet (in flits) */ 1211#define TXPKT_LSO_WR_HDR ((\ 1212 sizeof(struct fw_eth_tx_pkt_wr) + \ 1213 sizeof(struct cpl_tx_pkt_lso_core) + \ 1214 sizeof(struct cpl_tx_pkt_core) \ 1215 ) / 8 ) 1216 1217int 1218t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m) 1219{ 1220 struct port_info *pi = (void *)ifp->if_softc; 1221 struct adapter *sc = pi->adapter; 1222 struct sge_eq *eq = &txq->eq; 1223 struct buf_ring *br = txq->br; 1224 struct mbuf *next; 1225 int rc, coalescing, can_reclaim; 1226 struct txpkts txpkts; 1227 struct sgl sgl; 1228 1229 TXQ_LOCK_ASSERT_OWNED(txq); 1230 KASSERT(m, ("%s: called with nothing to do.", __func__)); 1231 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH, 1232 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); 1233 1234 prefetch(&eq->desc[eq->pidx]); 1235 prefetch(&txq->sdesc[eq->pidx]); 1236 1237 txpkts.npkt = 0;/* indicates there's nothing in txpkts */ 1238 coalescing = 0; 1239 1240 can_reclaim = reclaimable(eq); 1241 if (__predict_false(eq->flags & EQ_STALLED)) { 1242 if (can_reclaim < tx_resume_threshold(eq)) { 1243 txq->m = m; 1244 return (0); 1245 } 1246 eq->flags &= ~EQ_STALLED; 1247 eq->unstalled++; 1248 } 1249 1250 if (__predict_false(eq->flags & EQ_DOOMED)) { 1251 m_freem(m); 1252 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 1253 m_freem(m); 1254 return (ENETDOWN); 1255 } 1256 1257 if (eq->avail < 8 && can_reclaim) 1258 reclaim_tx_descs(txq, can_reclaim, 32); 1259 1260 for (; m; m = next ? next : drbr_dequeue(ifp, br)) { 1261 1262 if (eq->avail < 8) 1263 break; 1264 1265 next = m->m_nextpkt; 1266 m->m_nextpkt = NULL; 1267 1268 if (next || buf_ring_peek(br)) 1269 coalescing = 1; 1270 1271 rc = get_pkt_sgl(txq, &m, &sgl, coalescing); 1272 if (rc != 0) { 1273 if (rc == ENOMEM) { 1274 1275 /* Short of resources, suspend tx */ 1276 1277 m->m_nextpkt = next; 1278 break; 1279 } 1280 1281 /* 1282 * Unrecoverable error for this packet, throw it away 1283 * and move on to the next. get_pkt_sgl may already 1284 * have freed m (it will be NULL in that case and the 1285 * m_freem here is still safe). 1286 */ 1287 1288 m_freem(m); 1289 continue; 1290 } 1291 1292 if (coalescing && 1293 add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) { 1294 1295 /* Successfully absorbed into txpkts */ 1296 1297 write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl); 1298 goto doorbell; 1299 } 1300 1301 /* 1302 * We weren't coalescing to begin with, or current frame could 1303 * not be coalesced (add_to_txpkts flushes txpkts if a frame 1304 * given to it can't be coalesced). Either way there should be 1305 * nothing in txpkts. 1306 */ 1307 KASSERT(txpkts.npkt == 0, 1308 ("%s: txpkts not empty: %d", __func__, txpkts.npkt)); 1309 1310 /* We're sending out individual packets now */ 1311 coalescing = 0; 1312 1313 if (eq->avail < 8) 1314 reclaim_tx_descs(txq, 0, 8); 1315 rc = write_txpkt_wr(pi, txq, m, &sgl); 1316 if (rc != 0) { 1317 1318 /* Short of hardware descriptors, suspend tx */ 1319 1320 /* 1321 * This is an unlikely but expensive failure. We've 1322 * done all the hard work (DMA mappings etc.) and now we 1323 * can't send out the packet. What's worse, we have to 1324 * spend even more time freeing up everything in sgl. 1325 */ 1326 txq->no_desc++; 1327 free_pkt_sgl(txq, &sgl); 1328 1329 m->m_nextpkt = next; 1330 break; 1331 } 1332 1333 ETHER_BPF_MTAP(ifp, m); 1334 if (sgl.nsegs == 0) 1335 m_freem(m); 1336doorbell: 1337 if (eq->pending >= 64) 1338 ring_eq_db(sc, eq); 1339 1340 can_reclaim = reclaimable(eq); 1341 if (can_reclaim >= 32) 1342 reclaim_tx_descs(txq, can_reclaim, 64); 1343 } 1344 1345 if (txpkts.npkt > 0) 1346 write_txpkts_wr(txq, &txpkts); 1347 1348 /* 1349 * m not NULL means there was an error but we haven't thrown it away. 1350 * This can happen when we're short of tx descriptors (no_desc) or maybe 1351 * even DMA maps (no_dmamap). Either way, a credit flush and reclaim 1352 * will get things going again. 1353 */ 1354 if (m && !(eq->flags & EQ_CRFLUSHED)) { 1355 struct tx_sdesc *txsd = &txq->sdesc[eq->pidx]; 1356 1357 /* 1358 * If EQ_CRFLUSHED is not set then we know we have at least one 1359 * available descriptor because any WR that reduces eq->avail to 1360 * 0 also sets EQ_CRFLUSHED. 1361 */ 1362 KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__)); 1363 1364 txsd->desc_used = 1; 1365 txsd->credits = 0; 1366 write_eqflush_wr(eq); 1367 } 1368 txq->m = m; 1369 1370 if (eq->pending) 1371 ring_eq_db(sc, eq); 1372 1373 reclaim_tx_descs(txq, 0, 128); 1374 1375 if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0) 1376 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq); 1377 1378 return (0); 1379} 1380 1381void 1382t4_update_fl_bufsize(struct ifnet *ifp) 1383{ 1384 struct port_info *pi = ifp->if_softc; 1385 struct sge_rxq *rxq; 1386 struct sge_fl *fl; 1387 int i, bufsize; 1388 1389 /* large enough for a frame even when VLAN extraction is disabled */ 1390 bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ifp->if_mtu; 1391 bufsize = roundup(bufsize + FL_PKTSHIFT, fl_pad); 1392 for_each_rxq(pi, i, rxq) { 1393 fl = &rxq->fl; 1394 1395 FL_LOCK(fl); 1396 set_fl_tag_idx(fl, bufsize); 1397 FL_UNLOCK(fl); 1398 } 1399} 1400 1401int 1402can_resume_tx(struct sge_eq *eq) 1403{ 1404 return (reclaimable(eq) >= tx_resume_threshold(eq)); 1405} 1406 1407static inline void 1408init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 1409 int qsize, int esize, char *name) 1410{ 1411 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 1412 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 1413 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 1414 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 1415 1416 iq->flags = 0; 1417 iq->adapter = sc; 1418 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 1419 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 1420 if (pktc_idx >= 0) { 1421 iq->intr_params |= F_QINTR_CNT_EN; 1422 iq->intr_pktc_idx = pktc_idx; 1423 } 1424 iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */ 1425 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */ 1426 strlcpy(iq->lockname, name, sizeof(iq->lockname)); 1427} 1428 1429static inline void 1430init_fl(struct sge_fl *fl, int qsize, int bufsize, char *name) 1431{ 1432 fl->qsize = qsize; 1433 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 1434 set_fl_tag_idx(fl, bufsize); 1435} 1436 1437static inline void 1438init_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan, 1439 uint16_t iqid, char *name) 1440{ 1441 KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan)); 1442 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); 1443 1444 eq->flags = eqtype & EQ_TYPEMASK; 1445 eq->tx_chan = tx_chan; 1446 eq->iqid = iqid; 1447 eq->qsize = qsize; 1448 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 1449 1450 TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq); 1451 callout_init(&eq->tx_callout, CALLOUT_MPSAFE); 1452} 1453 1454static int 1455alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 1456 bus_dmamap_t *map, bus_addr_t *pa, void **va) 1457{ 1458 int rc; 1459 1460 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 1461 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 1462 if (rc != 0) { 1463 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 1464 goto done; 1465 } 1466 1467 rc = bus_dmamem_alloc(*tag, va, 1468 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 1469 if (rc != 0) { 1470 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 1471 goto done; 1472 } 1473 1474 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 1475 if (rc != 0) { 1476 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 1477 goto done; 1478 } 1479done: 1480 if (rc) 1481 free_ring(sc, *tag, *map, *pa, *va); 1482 1483 return (rc); 1484} 1485 1486static int 1487free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 1488 bus_addr_t pa, void *va) 1489{ 1490 if (pa) 1491 bus_dmamap_unload(tag, map); 1492 if (va) 1493 bus_dmamem_free(tag, va, map); 1494 if (tag) 1495 bus_dma_tag_destroy(tag); 1496 1497 return (0); 1498} 1499 1500/* 1501 * Allocates the ring for an ingress queue and an optional freelist. If the 1502 * freelist is specified it will be allocated and then associated with the 1503 * ingress queue. 1504 * 1505 * Returns errno on failure. Resources allocated up to that point may still be 1506 * allocated. Caller is responsible for cleanup in case this function fails. 1507 * 1508 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then 1509 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies 1510 * the abs_id of the ingress queue to which its interrupts should be forwarded. 1511 */ 1512static int 1513alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, 1514 int intr_idx, int cong) 1515{ 1516 int rc, i, cntxt_id; 1517 size_t len; 1518 struct fw_iq_cmd c; 1519 struct adapter *sc = iq->adapter; 1520 __be32 v = 0; 1521 1522 len = iq->qsize * iq->esize; 1523 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 1524 (void **)&iq->desc); 1525 if (rc != 0) 1526 return (rc); 1527 1528 bzero(&c, sizeof(c)); 1529 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 1530 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 1531 V_FW_IQ_CMD_VFN(0)); 1532 1533 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 1534 FW_LEN16(c)); 1535 1536 /* Special handling for firmware event queue */ 1537 if (iq == &sc->sge.fwq) 1538 v |= F_FW_IQ_CMD_IQASYNCH; 1539 1540 if (iq->flags & IQ_INTR) { 1541 KASSERT(intr_idx < sc->intr_count, 1542 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 1543 } else 1544 v |= F_FW_IQ_CMD_IQANDST; 1545 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 1546 1547 c.type_to_iqandstindex = htobe32(v | 1548 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 1549 V_FW_IQ_CMD_VIID(pi->viid) | 1550 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 1551 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 1552 F_FW_IQ_CMD_IQGTSMODE | 1553 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 1554 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4)); 1555 c.iqsize = htobe16(iq->qsize); 1556 c.iqaddr = htobe64(iq->ba); 1557 if (cong >= 0) 1558 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 1559 1560 if (fl) { 1561 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 1562 1563 for (i = 0; i < FL_BUF_SIZES; i++) { 1564 1565 /* 1566 * A freelist buffer must be 16 byte aligned as the SGE 1567 * uses the low 4 bits of the bus addr to figure out the 1568 * buffer size. 1569 */ 1570 rc = bus_dma_tag_create(sc->dmat, 16, 0, 1571 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1572 FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW, 1573 NULL, NULL, &fl->tag[i]); 1574 if (rc != 0) { 1575 device_printf(sc->dev, 1576 "failed to create fl DMA tag[%d]: %d\n", 1577 i, rc); 1578 return (rc); 1579 } 1580 } 1581 len = fl->qsize * RX_FL_ESIZE; 1582 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 1583 &fl->ba, (void **)&fl->desc); 1584 if (rc) 1585 return (rc); 1586 1587 /* Allocate space for one software descriptor per buffer. */ 1588 fl->cap = (fl->qsize - spg_len / RX_FL_ESIZE) * 8; 1589 FL_LOCK(fl); 1590 rc = alloc_fl_sdesc(fl); 1591 FL_UNLOCK(fl); 1592 if (rc != 0) { 1593 device_printf(sc->dev, 1594 "failed to setup fl software descriptors: %d\n", 1595 rc); 1596 return (rc); 1597 } 1598 fl->needed = fl->cap; 1599 fl->lowat = roundup(sc->sge.fl_starve_threshold, 8); 1600 1601 c.iqns_to_fl0congen |= 1602 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 1603 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 1604 F_FW_IQ_CMD_FL0PADEN); 1605 if (cong >= 0) { 1606 c.iqns_to_fl0congen |= 1607 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 1608 F_FW_IQ_CMD_FL0CONGCIF | 1609 F_FW_IQ_CMD_FL0CONGEN); 1610 } 1611 c.fl0dcaen_to_fl0cidxfthresh = 1612 htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) | 1613 V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B)); 1614 c.fl0size = htobe16(fl->qsize); 1615 c.fl0addr = htobe64(fl->ba); 1616 } 1617 1618 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1619 if (rc != 0) { 1620 device_printf(sc->dev, 1621 "failed to create ingress queue: %d\n", rc); 1622 return (rc); 1623 } 1624 1625 iq->cdesc = iq->desc; 1626 iq->cidx = 0; 1627 iq->gen = 1; 1628 iq->intr_next = iq->intr_params; 1629 iq->cntxt_id = be16toh(c.iqid); 1630 iq->abs_id = be16toh(c.physiqid); 1631 iq->flags |= IQ_ALLOCATED; 1632 1633 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 1634 if (cntxt_id >= sc->sge.niq) { 1635 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 1636 cntxt_id, sc->sge.niq - 1); 1637 } 1638 sc->sge.iqmap[cntxt_id] = iq; 1639 1640 if (fl) { 1641 fl->cntxt_id = be16toh(c.fl0id); 1642 fl->pidx = fl->cidx = 0; 1643 1644 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 1645 if (cntxt_id >= sc->sge.neq) { 1646 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 1647 __func__, cntxt_id, sc->sge.neq - 1); 1648 } 1649 sc->sge.eqmap[cntxt_id] = (void *)fl; 1650 1651 FL_LOCK(fl); 1652 /* Enough to make sure the SGE doesn't think it's starved */ 1653 refill_fl(sc, fl, fl->lowat); 1654 FL_UNLOCK(fl); 1655 1656 iq->flags |= IQ_HAS_FL; 1657 } 1658 1659 /* Enable IQ interrupts */ 1660 atomic_store_rel_int(&iq->state, IQS_IDLE); 1661 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) | 1662 V_INGRESSQID(iq->cntxt_id)); 1663 1664 return (0); 1665} 1666 1667static int 1668free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) 1669{ 1670 int i, rc; 1671 struct adapter *sc = iq->adapter; 1672 device_t dev; 1673 1674 if (sc == NULL) 1675 return (0); /* nothing to do */ 1676 1677 dev = pi ? pi->dev : sc->dev; 1678 1679 if (iq->flags & IQ_ALLOCATED) { 1680 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 1681 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 1682 fl ? fl->cntxt_id : 0xffff, 0xffff); 1683 if (rc != 0) { 1684 device_printf(dev, 1685 "failed to free queue %p: %d\n", iq, rc); 1686 return (rc); 1687 } 1688 iq->flags &= ~IQ_ALLOCATED; 1689 } 1690 1691 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 1692 1693 bzero(iq, sizeof(*iq)); 1694 1695 if (fl) { 1696 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 1697 fl->desc); 1698 1699 if (fl->sdesc) { 1700 FL_LOCK(fl); 1701 free_fl_sdesc(fl); 1702 FL_UNLOCK(fl); 1703 } 1704 1705 if (mtx_initialized(&fl->fl_lock)) 1706 mtx_destroy(&fl->fl_lock); 1707 1708 for (i = 0; i < FL_BUF_SIZES; i++) { 1709 if (fl->tag[i]) 1710 bus_dma_tag_destroy(fl->tag[i]); 1711 } 1712 1713 bzero(fl, sizeof(*fl)); 1714 } 1715 1716 return (0); 1717} 1718 1719static int 1720alloc_fwq(struct adapter *sc) 1721{ 1722 int rc, intr_idx; 1723 struct sge_iq *fwq = &sc->sge.fwq; 1724 char name[16]; 1725 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 1726 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 1727 1728 snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev)); 1729 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, name); 1730 fwq->flags |= IQ_INTR; /* always */ 1731 intr_idx = sc->intr_count > 1 ? 1 : 0; 1732 rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); 1733 if (rc != 0) { 1734 device_printf(sc->dev, 1735 "failed to create firmware event queue: %d\n", rc); 1736 return (rc); 1737 } 1738 1739 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, 1740 NULL, "firmware event queue"); 1741 children = SYSCTL_CHILDREN(oid); 1742 1743 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id", 1744 CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I", 1745 "absolute id of the queue"); 1746 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id", 1747 CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I", 1748 "SGE context id of the queue"); 1749 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", 1750 CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", 1751 "consumer index"); 1752 1753 return (0); 1754} 1755 1756static int 1757free_fwq(struct adapter *sc) 1758{ 1759 return free_iq_fl(NULL, &sc->sge.fwq, NULL); 1760} 1761 1762static int 1763alloc_mgmtq(struct adapter *sc) 1764{ 1765 int rc; 1766 struct sge_wrq *mgmtq = &sc->sge.mgmtq; 1767 char name[16]; 1768 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 1769 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 1770 1771 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD, 1772 NULL, "management queue"); 1773 1774 snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev)); 1775 init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, 1776 sc->sge.fwq.cntxt_id, name); 1777 rc = alloc_wrq(sc, NULL, mgmtq, oid); 1778 if (rc != 0) { 1779 device_printf(sc->dev, 1780 "failed to create management queue: %d\n", rc); 1781 return (rc); 1782 } 1783 1784 return (0); 1785} 1786 1787static int 1788free_mgmtq(struct adapter *sc) 1789{ 1790 1791 return free_wrq(sc, &sc->sge.mgmtq); 1792} 1793 1794static int 1795alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx, 1796 struct sysctl_oid *oid) 1797{ 1798 int rc; 1799 struct sysctl_oid_list *children; 1800 char name[16]; 1801 1802 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, 1 << pi->tx_chan); 1803 if (rc != 0) 1804 return (rc); 1805 1806 FL_LOCK(&rxq->fl); 1807 refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8); 1808 FL_UNLOCK(&rxq->fl); 1809 1810#if defined(INET) || defined(INET6) 1811 rc = tcp_lro_init(&rxq->lro); 1812 if (rc != 0) 1813 return (rc); 1814 rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */ 1815 1816 if (pi->ifp->if_capenable & IFCAP_LRO) 1817 rxq->iq.flags |= IQ_LRO_ENABLED; 1818#endif 1819 rxq->ifp = pi->ifp; 1820 1821 children = SYSCTL_CHILDREN(oid); 1822 1823 snprintf(name, sizeof(name), "%d", idx); 1824 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1825 NULL, "rx queue"); 1826 children = SYSCTL_CHILDREN(oid); 1827 1828 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", 1829 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I", 1830 "absolute id of the queue"); 1831 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 1832 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I", 1833 "SGE context id of the queue"); 1834 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", 1835 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I", 1836 "consumer index"); 1837#if defined(INET) || defined(INET6) 1838 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 1839 &rxq->lro.lro_queued, 0, NULL); 1840 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 1841 &rxq->lro.lro_flushed, 0, NULL); 1842#endif 1843 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 1844 &rxq->rxcsum, "# of times hardware assisted with checksum"); 1845 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction", 1846 CTLFLAG_RD, &rxq->vlan_extraction, 1847 "# of times hardware extracted 802.1Q tag"); 1848 1849 children = SYSCTL_CHILDREN(oid); 1850 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD, 1851 NULL, "freelist"); 1852 children = SYSCTL_CHILDREN(oid); 1853 1854 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 1855 CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I", 1856 "SGE context id of the queue"); 1857 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 1858 &rxq->fl.cidx, 0, "consumer index"); 1859 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 1860 &rxq->fl.pidx, 0, "producer index"); 1861 1862 return (rc); 1863} 1864 1865static int 1866free_rxq(struct port_info *pi, struct sge_rxq *rxq) 1867{ 1868 int rc; 1869 1870#if defined(INET) || defined(INET6) 1871 if (rxq->lro.ifp) { 1872 tcp_lro_free(&rxq->lro); 1873 rxq->lro.ifp = NULL; 1874 } 1875#endif 1876 1877 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl); 1878 if (rc == 0) 1879 bzero(rxq, sizeof(*rxq)); 1880 1881 return (rc); 1882} 1883 1884#ifdef TCP_OFFLOAD 1885static int 1886alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq, 1887 int intr_idx, int idx, struct sysctl_oid *oid) 1888{ 1889 int rc; 1890 struct sysctl_oid_list *children; 1891 char name[16]; 1892 1893 rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 1894 1 << pi->tx_chan); 1895 if (rc != 0) 1896 return (rc); 1897 1898 children = SYSCTL_CHILDREN(oid); 1899 1900 snprintf(name, sizeof(name), "%d", idx); 1901 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1902 NULL, "rx queue"); 1903 children = SYSCTL_CHILDREN(oid); 1904 1905 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", 1906 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16, 1907 "I", "absolute id of the queue"); 1908 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 1909 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16, 1910 "I", "SGE context id of the queue"); 1911 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", 1912 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I", 1913 "consumer index"); 1914 1915 children = SYSCTL_CHILDREN(oid); 1916 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD, 1917 NULL, "freelist"); 1918 children = SYSCTL_CHILDREN(oid); 1919 1920 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 1921 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16, 1922 "I", "SGE context id of the queue"); 1923 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 1924 &ofld_rxq->fl.cidx, 0, "consumer index"); 1925 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 1926 &ofld_rxq->fl.pidx, 0, "producer index"); 1927 1928 return (rc); 1929} 1930 1931static int 1932free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq) 1933{ 1934 int rc; 1935 1936 rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl); 1937 if (rc == 0) 1938 bzero(ofld_rxq, sizeof(*ofld_rxq)); 1939 1940 return (rc); 1941} 1942#endif 1943 1944static int 1945ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 1946{ 1947 int rc, cntxt_id; 1948 struct fw_eq_ctrl_cmd c; 1949 1950 bzero(&c, sizeof(c)); 1951 1952 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 1953 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 1954 V_FW_EQ_CTRL_CMD_VFN(0)); 1955 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 1956 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 1957 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */ 1958 c.physeqid_pkd = htobe32(0); 1959 c.fetchszm_to_iqid = 1960 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 1961 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 1962 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 1963 c.dcaen_to_eqsize = 1964 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1965 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1966 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1967 V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize)); 1968 c.eqaddr = htobe64(eq->ba); 1969 1970 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1971 if (rc != 0) { 1972 device_printf(sc->dev, 1973 "failed to create control queue %d: %d\n", eq->tx_chan, rc); 1974 return (rc); 1975 } 1976 eq->flags |= EQ_ALLOCATED; 1977 1978 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 1979 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 1980 if (cntxt_id >= sc->sge.neq) 1981 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 1982 cntxt_id, sc->sge.neq - 1); 1983 sc->sge.eqmap[cntxt_id] = eq; 1984 1985 return (rc); 1986} 1987 1988static int 1989eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 1990{ 1991 int rc, cntxt_id; 1992 struct fw_eq_eth_cmd c; 1993 1994 bzero(&c, sizeof(c)); 1995 1996 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 1997 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 1998 V_FW_EQ_ETH_CMD_VFN(0)); 1999 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 2000 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 2001 c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid)); 2002 c.fetchszm_to_iqid = 2003 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 2004 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 2005 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 2006 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 2007 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 2008 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 2009 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)); 2010 c.eqaddr = htobe64(eq->ba); 2011 2012 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2013 if (rc != 0) { 2014 device_printf(pi->dev, 2015 "failed to create Ethernet egress queue: %d\n", rc); 2016 return (rc); 2017 } 2018 eq->flags |= EQ_ALLOCATED; 2019 2020 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 2021 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 2022 if (cntxt_id >= sc->sge.neq) 2023 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 2024 cntxt_id, sc->sge.neq - 1); 2025 sc->sge.eqmap[cntxt_id] = eq; 2026 2027 return (rc); 2028} 2029 2030#ifdef TCP_OFFLOAD 2031static int 2032ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 2033{ 2034 int rc, cntxt_id; 2035 struct fw_eq_ofld_cmd c; 2036 2037 bzero(&c, sizeof(c)); 2038 2039 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 2040 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 2041 V_FW_EQ_OFLD_CMD_VFN(0)); 2042 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 2043 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 2044 c.fetchszm_to_iqid = 2045 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 2046 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 2047 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 2048 c.dcaen_to_eqsize = 2049 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 2050 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 2051 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 2052 V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize)); 2053 c.eqaddr = htobe64(eq->ba); 2054 2055 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2056 if (rc != 0) { 2057 device_printf(pi->dev, 2058 "failed to create egress queue for TCP offload: %d\n", rc); 2059 return (rc); 2060 } 2061 eq->flags |= EQ_ALLOCATED; 2062 2063 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 2064 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 2065 if (cntxt_id >= sc->sge.neq) 2066 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 2067 cntxt_id, sc->sge.neq - 1); 2068 sc->sge.eqmap[cntxt_id] = eq; 2069 2070 return (rc); 2071} 2072#endif 2073 2074static int 2075alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 2076{ 2077 int rc; 2078 size_t len; 2079 2080 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 2081 2082 len = eq->qsize * EQ_ESIZE; 2083 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 2084 &eq->ba, (void **)&eq->desc); 2085 if (rc) 2086 return (rc); 2087 2088 eq->cap = eq->qsize - spg_len / EQ_ESIZE; 2089 eq->spg = (void *)&eq->desc[eq->cap]; 2090 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ 2091 eq->pidx = eq->cidx = 0; 2092 2093 switch (eq->flags & EQ_TYPEMASK) { 2094 case EQ_CTRL: 2095 rc = ctrl_eq_alloc(sc, eq); 2096 break; 2097 2098 case EQ_ETH: 2099 rc = eth_eq_alloc(sc, pi, eq); 2100 break; 2101 2102#ifdef TCP_OFFLOAD 2103 case EQ_OFLD: 2104 rc = ofld_eq_alloc(sc, pi, eq); 2105 break; 2106#endif 2107 2108 default: 2109 panic("%s: invalid eq type %d.", __func__, 2110 eq->flags & EQ_TYPEMASK); 2111 } 2112 if (rc != 0) { 2113 device_printf(sc->dev, 2114 "failed to allocate egress queue(%d): %d", 2115 eq->flags & EQ_TYPEMASK, rc); 2116 } 2117 2118 eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus; 2119 2120 return (rc); 2121} 2122 2123static int 2124free_eq(struct adapter *sc, struct sge_eq *eq) 2125{ 2126 int rc; 2127 2128 if (eq->flags & EQ_ALLOCATED) { 2129 switch (eq->flags & EQ_TYPEMASK) { 2130 case EQ_CTRL: 2131 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, 2132 eq->cntxt_id); 2133 break; 2134 2135 case EQ_ETH: 2136 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, 2137 eq->cntxt_id); 2138 break; 2139 2140#ifdef TCP_OFFLOAD 2141 case EQ_OFLD: 2142 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, 2143 eq->cntxt_id); 2144 break; 2145#endif 2146 2147 default: 2148 panic("%s: invalid eq type %d.", __func__, 2149 eq->flags & EQ_TYPEMASK); 2150 } 2151 if (rc != 0) { 2152 device_printf(sc->dev, 2153 "failed to free egress queue (%d): %d\n", 2154 eq->flags & EQ_TYPEMASK, rc); 2155 return (rc); 2156 } 2157 eq->flags &= ~EQ_ALLOCATED; 2158 } 2159 2160 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 2161 2162 if (mtx_initialized(&eq->eq_lock)) 2163 mtx_destroy(&eq->eq_lock); 2164 2165 bzero(eq, sizeof(*eq)); 2166 return (0); 2167} 2168 2169static int 2170alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq, 2171 struct sysctl_oid *oid) 2172{ 2173 int rc; 2174 struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx; 2175 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2176 2177 rc = alloc_eq(sc, pi, &wrq->eq); 2178 if (rc) 2179 return (rc); 2180 2181 wrq->adapter = sc; 2182 STAILQ_INIT(&wrq->wr_list); 2183 2184 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 2185 &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); 2186 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 2187 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", 2188 "consumer index"); 2189 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", 2190 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", 2191 "producer index"); 2192 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD, 2193 &wrq->tx_wrs, "# of work requests"); 2194 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 2195 &wrq->no_desc, 0, 2196 "# of times queue ran out of hardware descriptors"); 2197 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD, 2198 &wrq->eq.unstalled, 0, "# of times queue recovered after stall"); 2199 2200 2201 return (rc); 2202} 2203 2204static int 2205free_wrq(struct adapter *sc, struct sge_wrq *wrq) 2206{ 2207 int rc; 2208 2209 rc = free_eq(sc, &wrq->eq); 2210 if (rc) 2211 return (rc); 2212 2213 bzero(wrq, sizeof(*wrq)); 2214 return (0); 2215} 2216 2217static int 2218alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx, 2219 struct sysctl_oid *oid) 2220{ 2221 int rc; 2222 struct adapter *sc = pi->adapter; 2223 struct sge_eq *eq = &txq->eq; 2224 char name[16]; 2225 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2226 2227 rc = alloc_eq(sc, pi, eq); 2228 if (rc) 2229 return (rc); 2230 2231 txq->ifp = pi->ifp; 2232 2233 txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE, 2234 M_ZERO | M_WAITOK); 2235 txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock); 2236 2237 rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR, 2238 BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS, 2239 BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag); 2240 if (rc != 0) { 2241 device_printf(sc->dev, 2242 "failed to create tx DMA tag: %d\n", rc); 2243 return (rc); 2244 } 2245 2246 /* 2247 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE 2248 * limit for any WR). txq->no_dmamap events shouldn't occur if maps is 2249 * sized for the worst case. 2250 */ 2251 rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8, 2252 M_WAITOK); 2253 if (rc != 0) { 2254 device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc); 2255 return (rc); 2256 } 2257 2258 snprintf(name, sizeof(name), "%d", idx); 2259 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 2260 NULL, "tx queue"); 2261 children = SYSCTL_CHILDREN(oid); 2262 2263 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 2264 &eq->cntxt_id, 0, "SGE context id of the queue"); 2265 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", 2266 CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", 2267 "consumer index"); 2268 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx", 2269 CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", 2270 "producer index"); 2271 2272 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 2273 &txq->txcsum, "# of times hardware assisted with checksum"); 2274 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion", 2275 CTLFLAG_RD, &txq->vlan_insertion, 2276 "# of times hardware inserted 802.1Q tag"); 2277 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 2278 &txq->tso_wrs, "# of TSO work requests"); 2279 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 2280 &txq->imm_wrs, "# of work requests with immediate data"); 2281 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 2282 &txq->sgl_wrs, "# of work requests with direct SGL"); 2283 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 2284 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 2285 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD, 2286 &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)"); 2287 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD, 2288 &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests"); 2289 2290 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD, 2291 &txq->no_dmamap, 0, "# of times txq ran out of DMA maps"); 2292 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 2293 &txq->no_desc, 0, "# of times txq ran out of hardware descriptors"); 2294 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD, 2295 &eq->egr_update, 0, "egress update notifications from the SGE"); 2296 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD, 2297 &eq->unstalled, 0, "# of times txq recovered after stall"); 2298 2299 return (rc); 2300} 2301 2302static int 2303free_txq(struct port_info *pi, struct sge_txq *txq) 2304{ 2305 int rc; 2306 struct adapter *sc = pi->adapter; 2307 struct sge_eq *eq = &txq->eq; 2308 2309 rc = free_eq(sc, eq); 2310 if (rc) 2311 return (rc); 2312 2313 free(txq->sdesc, M_CXGBE); 2314 2315 if (txq->txmaps.maps) 2316 t4_free_tx_maps(&txq->txmaps, txq->tx_tag); 2317 2318 buf_ring_free(txq->br, M_CXGBE); 2319 2320 if (txq->tx_tag) 2321 bus_dma_tag_destroy(txq->tx_tag); 2322 2323 bzero(txq, sizeof(*txq)); 2324 return (0); 2325} 2326 2327static void 2328oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 2329{ 2330 bus_addr_t *ba = arg; 2331 2332 KASSERT(nseg == 1, 2333 ("%s meant for single segment mappings only.", __func__)); 2334 2335 *ba = error ? 0 : segs->ds_addr; 2336} 2337 2338static inline bool 2339is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl) 2340{ 2341 *ctrl = (void *)((uintptr_t)iq->cdesc + 2342 (iq->esize - sizeof(struct rsp_ctrl))); 2343 2344 return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen); 2345} 2346 2347static inline void 2348iq_next(struct sge_iq *iq) 2349{ 2350 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize); 2351 if (__predict_false(++iq->cidx == iq->qsize - 1)) { 2352 iq->cidx = 0; 2353 iq->gen ^= 1; 2354 iq->cdesc = iq->desc; 2355 } 2356} 2357 2358#define FL_HW_IDX(x) ((x) >> 3) 2359static inline void 2360ring_fl_db(struct adapter *sc, struct sge_fl *fl) 2361{ 2362 int ndesc = fl->pending / 8; 2363 2364 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx)) 2365 ndesc--; /* hold back one credit */ 2366 2367 if (ndesc <= 0) 2368 return; /* nothing to do */ 2369 2370 wmb(); 2371 2372 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO | 2373 V_QID(fl->cntxt_id) | V_PIDX(ndesc)); 2374 fl->pending -= ndesc * 8; 2375} 2376 2377/* 2378 * Fill up the freelist by upto nbufs and maybe ring its doorbell. 2379 * 2380 * Returns non-zero to indicate that it should be added to the list of starving 2381 * freelists. 2382 */ 2383static int 2384refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs) 2385{ 2386 __be64 *d = &fl->desc[fl->pidx]; 2387 struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; 2388 bus_dma_tag_t tag; 2389 bus_addr_t pa; 2390 caddr_t cl; 2391 int rc; 2392 2393 FL_LOCK_ASSERT_OWNED(fl); 2394 2395 if (nbufs > fl->needed) 2396 nbufs = fl->needed; 2397 2398 while (nbufs--) { 2399 2400 if (sd->cl != NULL) { 2401 2402 /* 2403 * This happens when a frame small enough to fit 2404 * entirely in an mbuf was received in cl last time. 2405 * We'd held on to cl and can reuse it now. Note that 2406 * we reuse a cluster of the old size if fl->tag_idx is 2407 * no longer the same as sd->tag_idx. 2408 */ 2409 2410 KASSERT(*d == sd->ba_tag, 2411 ("%s: recyling problem at pidx %d", 2412 __func__, fl->pidx)); 2413 2414 d++; 2415 goto recycled; 2416 } 2417 2418 2419 if (fl->tag_idx != sd->tag_idx) { 2420 bus_dmamap_t map; 2421 bus_dma_tag_t newtag = fl->tag[fl->tag_idx]; 2422 bus_dma_tag_t oldtag = fl->tag[sd->tag_idx]; 2423 2424 /* 2425 * An MTU change can get us here. Discard the old map 2426 * which was created with the old tag, but only if 2427 * we're able to get a new one. 2428 */ 2429 rc = bus_dmamap_create(newtag, 0, &map); 2430 if (rc == 0) { 2431 bus_dmamap_destroy(oldtag, sd->map); 2432 sd->map = map; 2433 sd->tag_idx = fl->tag_idx; 2434 } 2435 } 2436 2437 tag = fl->tag[sd->tag_idx]; 2438 2439 cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx)); 2440 if (cl == NULL) 2441 break; 2442 2443 rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx), 2444 oneseg_dma_callback, &pa, 0); 2445 if (rc != 0 || pa == 0) { 2446 fl->dmamap_failed++; 2447 uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl); 2448 break; 2449 } 2450 2451 sd->cl = cl; 2452 *d++ = htobe64(pa | sd->tag_idx); 2453 2454#ifdef INVARIANTS 2455 sd->ba_tag = htobe64(pa | sd->tag_idx); 2456#endif 2457 2458recycled: 2459 /* sd->m is never recycled, should always be NULL */ 2460 KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__)); 2461 2462 sd->m = m_gethdr(M_NOWAIT, MT_NOINIT); 2463 if (sd->m == NULL) 2464 break; 2465 2466 fl->pending++; 2467 fl->needed--; 2468 sd++; 2469 if (++fl->pidx == fl->cap) { 2470 fl->pidx = 0; 2471 sd = fl->sdesc; 2472 d = fl->desc; 2473 } 2474 } 2475 2476 if (fl->pending >= 8) 2477 ring_fl_db(sc, fl); 2478 2479 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 2480} 2481 2482/* 2483 * Attempt to refill all starving freelists. 2484 */ 2485static void 2486refill_sfl(void *arg) 2487{ 2488 struct adapter *sc = arg; 2489 struct sge_fl *fl, *fl_temp; 2490 2491 mtx_lock(&sc->sfl_lock); 2492 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 2493 FL_LOCK(fl); 2494 refill_fl(sc, fl, 64); 2495 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 2496 TAILQ_REMOVE(&sc->sfl, fl, link); 2497 fl->flags &= ~FL_STARVING; 2498 } 2499 FL_UNLOCK(fl); 2500 } 2501 2502 if (!TAILQ_EMPTY(&sc->sfl)) 2503 callout_schedule(&sc->sfl_callout, hz / 5); 2504 mtx_unlock(&sc->sfl_lock); 2505} 2506 2507static int 2508alloc_fl_sdesc(struct sge_fl *fl) 2509{ 2510 struct fl_sdesc *sd; 2511 bus_dma_tag_t tag; 2512 int i, rc; 2513 2514 FL_LOCK_ASSERT_OWNED(fl); 2515 2516 fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE, 2517 M_ZERO | M_WAITOK); 2518 2519 tag = fl->tag[fl->tag_idx]; 2520 sd = fl->sdesc; 2521 for (i = 0; i < fl->cap; i++, sd++) { 2522 2523 sd->tag_idx = fl->tag_idx; 2524 rc = bus_dmamap_create(tag, 0, &sd->map); 2525 if (rc != 0) 2526 goto failed; 2527 } 2528 2529 return (0); 2530failed: 2531 while (--i >= 0) { 2532 sd--; 2533 bus_dmamap_destroy(tag, sd->map); 2534 if (sd->m) { 2535 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0); 2536 m_free(sd->m); 2537 sd->m = NULL; 2538 } 2539 } 2540 KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__)); 2541 2542 free(fl->sdesc, M_CXGBE); 2543 fl->sdesc = NULL; 2544 2545 return (rc); 2546} 2547 2548static void 2549free_fl_sdesc(struct sge_fl *fl) 2550{ 2551 struct fl_sdesc *sd; 2552 int i; 2553 2554 FL_LOCK_ASSERT_OWNED(fl); 2555 2556 sd = fl->sdesc; 2557 for (i = 0; i < fl->cap; i++, sd++) { 2558 2559 if (sd->m) { 2560 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0); 2561 m_free(sd->m); 2562 sd->m = NULL; 2563 } 2564 2565 if (sd->cl) { 2566 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 2567 uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl); 2568 sd->cl = NULL; 2569 } 2570 2571 bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map); 2572 } 2573 2574 free(fl->sdesc, M_CXGBE); 2575 fl->sdesc = NULL; 2576} 2577 2578int 2579t4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count, 2580 int flags) 2581{ 2582 struct tx_map *txm; 2583 int i, rc; 2584 2585 txmaps->map_total = txmaps->map_avail = count; 2586 txmaps->map_cidx = txmaps->map_pidx = 0; 2587 2588 txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE, 2589 M_ZERO | flags); 2590 2591 txm = txmaps->maps; 2592 for (i = 0; i < count; i++, txm++) { 2593 rc = bus_dmamap_create(tx_tag, 0, &txm->map); 2594 if (rc != 0) 2595 goto failed; 2596 } 2597 2598 return (0); 2599failed: 2600 while (--i >= 0) { 2601 txm--; 2602 bus_dmamap_destroy(tx_tag, txm->map); 2603 } 2604 KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__)); 2605 2606 free(txmaps->maps, M_CXGBE); 2607 txmaps->maps = NULL; 2608 2609 return (rc); 2610} 2611 2612void 2613t4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag) 2614{ 2615 struct tx_map *txm; 2616 int i; 2617 2618 txm = txmaps->maps; 2619 for (i = 0; i < txmaps->map_total; i++, txm++) { 2620 2621 if (txm->m) { 2622 bus_dmamap_unload(tx_tag, txm->map); 2623 m_freem(txm->m); 2624 txm->m = NULL; 2625 } 2626 2627 bus_dmamap_destroy(tx_tag, txm->map); 2628 } 2629 2630 free(txmaps->maps, M_CXGBE); 2631 txmaps->maps = NULL; 2632} 2633 2634/* 2635 * We'll do immediate data tx for non-TSO, but only when not coalescing. We're 2636 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes 2637 * of immediate data. 2638 */ 2639#define IMM_LEN ( \ 2640 2 * EQ_ESIZE \ 2641 - sizeof(struct fw_eth_tx_pkt_wr) \ 2642 - sizeof(struct cpl_tx_pkt_core)) 2643 2644/* 2645 * Returns non-zero on failure, no need to cleanup anything in that case. 2646 * 2647 * Note 1: We always try to defrag the mbuf if required and return EFBIG only 2648 * if the resulting chain still won't fit in a tx descriptor. 2649 * 2650 * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf 2651 * does not have the TCP header in it. 2652 */ 2653static int 2654get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl, 2655 int sgl_only) 2656{ 2657 struct mbuf *m = *fp; 2658 struct tx_maps *txmaps; 2659 struct tx_map *txm; 2660 int rc, defragged = 0, n; 2661 2662 TXQ_LOCK_ASSERT_OWNED(txq); 2663 2664 if (m->m_pkthdr.tso_segsz) 2665 sgl_only = 1; /* Do not allow immediate data with LSO */ 2666 2667start: sgl->nsegs = 0; 2668 2669 if (m->m_pkthdr.len <= IMM_LEN && !sgl_only) 2670 return (0); /* nsegs = 0 tells caller to use imm. tx */ 2671 2672 txmaps = &txq->txmaps; 2673 if (txmaps->map_avail == 0) { 2674 txq->no_dmamap++; 2675 return (ENOMEM); 2676 } 2677 txm = &txmaps->maps[txmaps->map_pidx]; 2678 2679 if (m->m_pkthdr.tso_segsz && m->m_len < 50) { 2680 *fp = m_pullup(m, 50); 2681 m = *fp; 2682 if (m == NULL) 2683 return (ENOBUFS); 2684 } 2685 2686 rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg, 2687 &sgl->nsegs, BUS_DMA_NOWAIT); 2688 if (rc == EFBIG && defragged == 0) { 2689 m = m_defrag(m, M_DONTWAIT); 2690 if (m == NULL) 2691 return (EFBIG); 2692 2693 defragged = 1; 2694 *fp = m; 2695 goto start; 2696 } 2697 if (rc != 0) 2698 return (rc); 2699 2700 txm->m = m; 2701 txmaps->map_avail--; 2702 if (++txmaps->map_pidx == txmaps->map_total) 2703 txmaps->map_pidx = 0; 2704 2705 KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS, 2706 ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs)); 2707 2708 /* 2709 * Store the # of flits required to hold this frame's SGL in nflits. An 2710 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by 2711 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used 2712 * then len1 must be set to 0. 2713 */ 2714 n = sgl->nsegs - 1; 2715 sgl->nflits = (3 * n) / 2 + (n & 1) + 2; 2716 2717 return (0); 2718} 2719 2720 2721/* 2722 * Releases all the txq resources used up in the specified sgl. 2723 */ 2724static int 2725free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl) 2726{ 2727 struct tx_maps *txmaps; 2728 struct tx_map *txm; 2729 2730 TXQ_LOCK_ASSERT_OWNED(txq); 2731 2732 if (sgl->nsegs == 0) 2733 return (0); /* didn't use any map */ 2734 2735 txmaps = &txq->txmaps; 2736 2737 /* 1 pkt uses exactly 1 map, back it out */ 2738 2739 txmaps->map_avail++; 2740 if (txmaps->map_pidx > 0) 2741 txmaps->map_pidx--; 2742 else 2743 txmaps->map_pidx = txmaps->map_total - 1; 2744 2745 txm = &txmaps->maps[txmaps->map_pidx]; 2746 bus_dmamap_unload(txq->tx_tag, txm->map); 2747 txm->m = NULL; 2748 2749 return (0); 2750} 2751 2752static int 2753write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m, 2754 struct sgl *sgl) 2755{ 2756 struct sge_eq *eq = &txq->eq; 2757 struct fw_eth_tx_pkt_wr *wr; 2758 struct cpl_tx_pkt_core *cpl; 2759 uint32_t ctrl; /* used in many unrelated places */ 2760 uint64_t ctrl1; 2761 int nflits, ndesc, pktlen; 2762 struct tx_sdesc *txsd; 2763 caddr_t dst; 2764 2765 TXQ_LOCK_ASSERT_OWNED(txq); 2766 2767 pktlen = m->m_pkthdr.len; 2768 2769 /* 2770 * Do we have enough flits to send this frame out? 2771 */ 2772 ctrl = sizeof(struct cpl_tx_pkt_core); 2773 if (m->m_pkthdr.tso_segsz) { 2774 nflits = TXPKT_LSO_WR_HDR; 2775 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 2776 } else 2777 nflits = TXPKT_WR_HDR; 2778 if (sgl->nsegs > 0) 2779 nflits += sgl->nflits; 2780 else { 2781 nflits += howmany(pktlen, 8); 2782 ctrl += pktlen; 2783 } 2784 ndesc = howmany(nflits, 8); 2785 if (ndesc > eq->avail) 2786 return (ENOMEM); 2787 2788 /* Firmware work request header */ 2789 wr = (void *)&eq->desc[eq->pidx]; 2790 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 2791 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 2792 ctrl = V_FW_WR_LEN16(howmany(nflits, 2)); 2793 if (eq->avail == ndesc) { 2794 if (!(eq->flags & EQ_CRFLUSHED)) { 2795 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2796 eq->flags |= EQ_CRFLUSHED; 2797 } 2798 eq->flags |= EQ_STALLED; 2799 } 2800 2801 wr->equiq_to_len16 = htobe32(ctrl); 2802 wr->r3 = 0; 2803 2804 if (m->m_pkthdr.tso_segsz) { 2805 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 2806 struct ether_header *eh; 2807 void *l3hdr; 2808#if defined(INET) || defined(INET6) 2809 struct tcphdr *tcp; 2810#endif 2811 uint16_t eh_type; 2812 2813 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 2814 F_LSO_LAST_SLICE; 2815 2816 eh = mtod(m, struct ether_header *); 2817 eh_type = ntohs(eh->ether_type); 2818 if (eh_type == ETHERTYPE_VLAN) { 2819 struct ether_vlan_header *evh = (void *)eh; 2820 2821 ctrl |= V_LSO_ETHHDR_LEN(1); 2822 l3hdr = evh + 1; 2823 eh_type = ntohs(evh->evl_proto); 2824 } else 2825 l3hdr = eh + 1; 2826 2827 switch (eh_type) { 2828#ifdef INET6 2829 case ETHERTYPE_IPV6: 2830 { 2831 struct ip6_hdr *ip6 = l3hdr; 2832 2833 /* 2834 * XXX-BZ For now we do not pretend to support 2835 * IPv6 extension headers. 2836 */ 2837 KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO " 2838 "with ip6_nxt != TCP: %u", __func__, ip6->ip6_nxt)); 2839 tcp = (struct tcphdr *)(ip6 + 1); 2840 ctrl |= F_LSO_IPV6; 2841 ctrl |= V_LSO_IPHDR_LEN(sizeof(*ip6) >> 2) | 2842 V_LSO_TCPHDR_LEN(tcp->th_off); 2843 break; 2844 } 2845#endif 2846#ifdef INET 2847 case ETHERTYPE_IP: 2848 { 2849 struct ip *ip = l3hdr; 2850 2851 tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4); 2852 ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) | 2853 V_LSO_TCPHDR_LEN(tcp->th_off); 2854 break; 2855 } 2856#endif 2857 default: 2858 panic("%s: CSUM_TSO but no supported IP version " 2859 "(0x%04x)", __func__, eh_type); 2860 } 2861 2862 lso->lso_ctrl = htobe32(ctrl); 2863 lso->ipid_ofst = htobe16(0); 2864 lso->mss = htobe16(m->m_pkthdr.tso_segsz); 2865 lso->seqno_offset = htobe32(0); 2866 lso->len = htobe32(pktlen); 2867 2868 cpl = (void *)(lso + 1); 2869 2870 txq->tso_wrs++; 2871 } else 2872 cpl = (void *)(wr + 1); 2873 2874 /* Checksum offload */ 2875 ctrl1 = 0; 2876 if (!(m->m_pkthdr.csum_flags & CSUM_IP)) 2877 ctrl1 |= F_TXPKT_IPCSUM_DIS; 2878 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | 2879 CSUM_TCP_IPV6))) 2880 ctrl1 |= F_TXPKT_L4CSUM_DIS; 2881 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 2882 CSUM_UDP_IPV6 | CSUM_TCP_IPV6)) 2883 txq->txcsum++; /* some hardware assistance provided */ 2884 2885 /* VLAN tag insertion */ 2886 if (m->m_flags & M_VLANTAG) { 2887 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 2888 txq->vlan_insertion++; 2889 } 2890 2891 /* CPL header */ 2892 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 2893 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 2894 cpl->pack = 0; 2895 cpl->len = htobe16(pktlen); 2896 cpl->ctrl1 = htobe64(ctrl1); 2897 2898 /* Software descriptor */ 2899 txsd = &txq->sdesc[eq->pidx]; 2900 txsd->desc_used = ndesc; 2901 2902 eq->pending += ndesc; 2903 eq->avail -= ndesc; 2904 eq->pidx += ndesc; 2905 if (eq->pidx >= eq->cap) 2906 eq->pidx -= eq->cap; 2907 2908 /* SGL */ 2909 dst = (void *)(cpl + 1); 2910 if (sgl->nsegs > 0) { 2911 txsd->credits = 1; 2912 txq->sgl_wrs++; 2913 write_sgl_to_txd(eq, sgl, &dst); 2914 } else { 2915 txsd->credits = 0; 2916 txq->imm_wrs++; 2917 for (; m; m = m->m_next) { 2918 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 2919#ifdef INVARIANTS 2920 pktlen -= m->m_len; 2921#endif 2922 } 2923#ifdef INVARIANTS 2924 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 2925#endif 2926 2927 } 2928 2929 txq->txpkt_wrs++; 2930 return (0); 2931} 2932 2933/* 2934 * Returns 0 to indicate that m has been accepted into a coalesced tx work 2935 * request. It has either been folded into txpkts or txpkts was flushed and m 2936 * has started a new coalesced work request (as the first frame in a fresh 2937 * txpkts). 2938 * 2939 * Returns non-zero to indicate a failure - caller is responsible for 2940 * transmitting m, if there was anything in txpkts it has been flushed. 2941 */ 2942static int 2943add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts, 2944 struct mbuf *m, struct sgl *sgl) 2945{ 2946 struct sge_eq *eq = &txq->eq; 2947 int can_coalesce; 2948 struct tx_sdesc *txsd; 2949 int flits; 2950 2951 TXQ_LOCK_ASSERT_OWNED(txq); 2952 2953 KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__)); 2954 2955 if (txpkts->npkt > 0) { 2956 flits = TXPKTS_PKT_HDR + sgl->nflits; 2957 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 2958 txpkts->nflits + flits <= TX_WR_FLITS && 2959 txpkts->nflits + flits <= eq->avail * 8 && 2960 txpkts->plen + m->m_pkthdr.len < 65536; 2961 2962 if (can_coalesce) { 2963 txpkts->npkt++; 2964 txpkts->nflits += flits; 2965 txpkts->plen += m->m_pkthdr.len; 2966 2967 txsd = &txq->sdesc[eq->pidx]; 2968 txsd->credits++; 2969 2970 return (0); 2971 } 2972 2973 /* 2974 * Couldn't coalesce m into txpkts. The first order of business 2975 * is to send txpkts on its way. Then we'll revisit m. 2976 */ 2977 write_txpkts_wr(txq, txpkts); 2978 } 2979 2980 /* 2981 * Check if we can start a new coalesced tx work request with m as 2982 * the first packet in it. 2983 */ 2984 2985 KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__)); 2986 2987 flits = TXPKTS_WR_HDR + sgl->nflits; 2988 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 2989 flits <= eq->avail * 8 && flits <= TX_WR_FLITS; 2990 2991 if (can_coalesce == 0) 2992 return (EINVAL); 2993 2994 /* 2995 * Start a fresh coalesced tx WR with m as the first frame in it. 2996 */ 2997 txpkts->npkt = 1; 2998 txpkts->nflits = flits; 2999 txpkts->flitp = &eq->desc[eq->pidx].flit[2]; 3000 txpkts->plen = m->m_pkthdr.len; 3001 3002 txsd = &txq->sdesc[eq->pidx]; 3003 txsd->credits = 1; 3004 3005 return (0); 3006} 3007 3008/* 3009 * Note that write_txpkts_wr can never run out of hardware descriptors (but 3010 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for 3011 * coalescing only if sufficient hardware descriptors are available. 3012 */ 3013static void 3014write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) 3015{ 3016 struct sge_eq *eq = &txq->eq; 3017 struct fw_eth_tx_pkts_wr *wr; 3018 struct tx_sdesc *txsd; 3019 uint32_t ctrl; 3020 int ndesc; 3021 3022 TXQ_LOCK_ASSERT_OWNED(txq); 3023 3024 ndesc = howmany(txpkts->nflits, 8); 3025 3026 wr = (void *)&eq->desc[eq->pidx]; 3027 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 3028 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2)); 3029 if (eq->avail == ndesc) { 3030 if (!(eq->flags & EQ_CRFLUSHED)) { 3031 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 3032 eq->flags |= EQ_CRFLUSHED; 3033 } 3034 eq->flags |= EQ_STALLED; 3035 } 3036 wr->equiq_to_len16 = htobe32(ctrl); 3037 wr->plen = htobe16(txpkts->plen); 3038 wr->npkt = txpkts->npkt; 3039 wr->r3 = wr->type = 0; 3040 3041 /* Everything else already written */ 3042 3043 txsd = &txq->sdesc[eq->pidx]; 3044 txsd->desc_used = ndesc; 3045 3046 KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__)); 3047 3048 eq->pending += ndesc; 3049 eq->avail -= ndesc; 3050 eq->pidx += ndesc; 3051 if (eq->pidx >= eq->cap) 3052 eq->pidx -= eq->cap; 3053 3054 txq->txpkts_pkts += txpkts->npkt; 3055 txq->txpkts_wrs++; 3056 txpkts->npkt = 0; /* emptied */ 3057} 3058 3059static inline void 3060write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, 3061 struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl) 3062{ 3063 struct ulp_txpkt *ulpmc; 3064 struct ulptx_idata *ulpsc; 3065 struct cpl_tx_pkt_core *cpl; 3066 struct sge_eq *eq = &txq->eq; 3067 uintptr_t flitp, start, end; 3068 uint64_t ctrl; 3069 caddr_t dst; 3070 3071 KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__)); 3072 3073 start = (uintptr_t)eq->desc; 3074 end = (uintptr_t)eq->spg; 3075 3076 /* Checksum offload */ 3077 ctrl = 0; 3078 if (!(m->m_pkthdr.csum_flags & CSUM_IP)) 3079 ctrl |= F_TXPKT_IPCSUM_DIS; 3080 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) 3081 ctrl |= F_TXPKT_L4CSUM_DIS; 3082 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP)) 3083 txq->txcsum++; /* some hardware assistance provided */ 3084 3085 /* VLAN tag insertion */ 3086 if (m->m_flags & M_VLANTAG) { 3087 ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 3088 txq->vlan_insertion++; 3089 } 3090 3091 /* 3092 * The previous packet's SGL must have ended at a 16 byte boundary (this 3093 * is required by the firmware/hardware). It follows that flitp cannot 3094 * wrap around between the ULPTX master command and ULPTX subcommand (8 3095 * bytes each), and that it can not wrap around in the middle of the 3096 * cpl_tx_pkt_core either. 3097 */ 3098 flitp = (uintptr_t)txpkts->flitp; 3099 KASSERT((flitp & 0xf) == 0, 3100 ("%s: last SGL did not end at 16 byte boundary: %p", 3101 __func__, txpkts->flitp)); 3102 3103 /* ULP master command */ 3104 ulpmc = (void *)flitp; 3105 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) | 3106 V_ULP_TXPKT_FID(eq->iqid)); 3107 ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) + 3108 sizeof(*cpl) + 8 * sgl->nflits, 16)); 3109 3110 /* ULP subcommand */ 3111 ulpsc = (void *)(ulpmc + 1); 3112 ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) | 3113 F_ULP_TX_SC_MORE); 3114 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 3115 3116 flitp += sizeof(*ulpmc) + sizeof(*ulpsc); 3117 if (flitp == end) 3118 flitp = start; 3119 3120 /* CPL_TX_PKT */ 3121 cpl = (void *)flitp; 3122 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3123 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 3124 cpl->pack = 0; 3125 cpl->len = htobe16(m->m_pkthdr.len); 3126 cpl->ctrl1 = htobe64(ctrl); 3127 3128 flitp += sizeof(*cpl); 3129 if (flitp == end) 3130 flitp = start; 3131 3132 /* SGL for this frame */ 3133 dst = (caddr_t)flitp; 3134 txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst); 3135 txpkts->flitp = (void *)dst; 3136 3137 KASSERT(((uintptr_t)dst & 0xf) == 0, 3138 ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst)); 3139} 3140 3141/* 3142 * If the SGL ends on an address that is not 16 byte aligned, this function will 3143 * add a 0 filled flit at the end. It returns 1 in that case. 3144 */ 3145static int 3146write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to) 3147{ 3148 __be64 *flitp, *end; 3149 struct ulptx_sgl *usgl; 3150 bus_dma_segment_t *seg; 3151 int i, padded; 3152 3153 KASSERT(sgl->nsegs > 0 && sgl->nflits > 0, 3154 ("%s: bad SGL - nsegs=%d, nflits=%d", 3155 __func__, sgl->nsegs, sgl->nflits)); 3156 3157 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 3158 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 3159 3160 flitp = (__be64 *)(*to); 3161 end = flitp + sgl->nflits; 3162 seg = &sgl->seg[0]; 3163 usgl = (void *)flitp; 3164 3165 /* 3166 * We start at a 16 byte boundary somewhere inside the tx descriptor 3167 * ring, so we're at least 16 bytes away from the status page. There is 3168 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 3169 */ 3170 3171 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 3172 V_ULPTX_NSGE(sgl->nsegs)); 3173 usgl->len0 = htobe32(seg->ds_len); 3174 usgl->addr0 = htobe64(seg->ds_addr); 3175 seg++; 3176 3177 if ((uintptr_t)end <= (uintptr_t)eq->spg) { 3178 3179 /* Won't wrap around at all */ 3180 3181 for (i = 0; i < sgl->nsegs - 1; i++, seg++) { 3182 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len); 3183 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr); 3184 } 3185 if (i & 1) 3186 usgl->sge[i / 2].len[1] = htobe32(0); 3187 } else { 3188 3189 /* Will wrap somewhere in the rest of the SGL */ 3190 3191 /* 2 flits already written, write the rest flit by flit */ 3192 flitp = (void *)(usgl + 1); 3193 for (i = 0; i < sgl->nflits - 2; i++) { 3194 if ((uintptr_t)flitp == (uintptr_t)eq->spg) 3195 flitp = (void *)eq->desc; 3196 *flitp++ = get_flit(seg, sgl->nsegs - 1, i); 3197 } 3198 end = flitp; 3199 } 3200 3201 if ((uintptr_t)end & 0xf) { 3202 *(uint64_t *)end = 0; 3203 end++; 3204 padded = 1; 3205 } else 3206 padded = 0; 3207 3208 if ((uintptr_t)end == (uintptr_t)eq->spg) 3209 *to = (void *)eq->desc; 3210 else 3211 *to = (void *)end; 3212 3213 return (padded); 3214} 3215 3216static inline void 3217copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 3218{ 3219 if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) { 3220 bcopy(from, *to, len); 3221 (*to) += len; 3222 } else { 3223 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to); 3224 3225 bcopy(from, *to, portion); 3226 from += portion; 3227 portion = len - portion; /* remaining */ 3228 bcopy(from, (void *)eq->desc, portion); 3229 (*to) = (caddr_t)eq->desc + portion; 3230 } 3231} 3232 3233static inline void 3234ring_eq_db(struct adapter *sc, struct sge_eq *eq) 3235{ 3236 wmb(); 3237 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), 3238 V_QID(eq->cntxt_id) | V_PIDX(eq->pending)); 3239 eq->pending = 0; 3240} 3241 3242static inline int 3243reclaimable(struct sge_eq *eq) 3244{ 3245 unsigned int cidx; 3246 3247 cidx = eq->spg->cidx; /* stable snapshot */ 3248 cidx = be16toh(cidx); 3249 3250 if (cidx >= eq->cidx) 3251 return (cidx - eq->cidx); 3252 else 3253 return (cidx + eq->cap - eq->cidx); 3254} 3255 3256/* 3257 * There are "can_reclaim" tx descriptors ready to be reclaimed. Reclaim as 3258 * many as possible but stop when there are around "n" mbufs to free. 3259 * 3260 * The actual number reclaimed is provided as the return value. 3261 */ 3262static int 3263reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n) 3264{ 3265 struct tx_sdesc *txsd; 3266 struct tx_maps *txmaps; 3267 struct tx_map *txm; 3268 unsigned int reclaimed, maps; 3269 struct sge_eq *eq = &txq->eq; 3270 3271 TXQ_LOCK_ASSERT_OWNED(txq); 3272 3273 if (can_reclaim == 0) 3274 can_reclaim = reclaimable(eq); 3275 3276 maps = reclaimed = 0; 3277 while (can_reclaim && maps < n) { 3278 int ndesc; 3279 3280 txsd = &txq->sdesc[eq->cidx]; 3281 ndesc = txsd->desc_used; 3282 3283 /* Firmware doesn't return "partial" credits. */ 3284 KASSERT(can_reclaim >= ndesc, 3285 ("%s: unexpected number of credits: %d, %d", 3286 __func__, can_reclaim, ndesc)); 3287 3288 maps += txsd->credits; 3289 3290 reclaimed += ndesc; 3291 can_reclaim -= ndesc; 3292 3293 eq->cidx += ndesc; 3294 if (__predict_false(eq->cidx >= eq->cap)) 3295 eq->cidx -= eq->cap; 3296 } 3297 3298 txmaps = &txq->txmaps; 3299 txm = &txmaps->maps[txmaps->map_cidx]; 3300 if (maps) 3301 prefetch(txm->m); 3302 3303 eq->avail += reclaimed; 3304 KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */ 3305 ("%s: too many descriptors available", __func__)); 3306 3307 txmaps->map_avail += maps; 3308 KASSERT(txmaps->map_avail <= txmaps->map_total, 3309 ("%s: too many maps available", __func__)); 3310 3311 while (maps--) { 3312 struct tx_map *next; 3313 3314 next = txm + 1; 3315 if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total)) 3316 next = txmaps->maps; 3317 prefetch(next->m); 3318 3319 bus_dmamap_unload(txq->tx_tag, txm->map); 3320 m_freem(txm->m); 3321 txm->m = NULL; 3322 3323 txm = next; 3324 if (__predict_false(++txmaps->map_cidx == txmaps->map_total)) 3325 txmaps->map_cidx = 0; 3326 } 3327 3328 return (reclaimed); 3329} 3330 3331static void 3332write_eqflush_wr(struct sge_eq *eq) 3333{ 3334 struct fw_eq_flush_wr *wr; 3335 3336 EQ_LOCK_ASSERT_OWNED(eq); 3337 KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__)); 3338 KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__)); 3339 3340 wr = (void *)&eq->desc[eq->pidx]; 3341 bzero(wr, sizeof(*wr)); 3342 wr->opcode = FW_EQ_FLUSH_WR; 3343 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) | 3344 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 3345 3346 eq->flags |= (EQ_CRFLUSHED | EQ_STALLED); 3347 eq->pending++; 3348 eq->avail--; 3349 if (++eq->pidx == eq->cap) 3350 eq->pidx = 0; 3351} 3352 3353static __be64 3354get_flit(bus_dma_segment_t *sgl, int nsegs, int idx) 3355{ 3356 int i = (idx / 3) * 2; 3357 3358 switch (idx % 3) { 3359 case 0: { 3360 __be64 rc; 3361 3362 rc = htobe32(sgl[i].ds_len); 3363 if (i + 1 < nsegs) 3364 rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32; 3365 3366 return (rc); 3367 } 3368 case 1: 3369 return htobe64(sgl[i].ds_addr); 3370 case 2: 3371 return htobe64(sgl[i + 1].ds_addr); 3372 } 3373 3374 return (0); 3375} 3376 3377static void 3378set_fl_tag_idx(struct sge_fl *fl, int bufsize) 3379{ 3380 int i; 3381 3382 for (i = 0; i < FL_BUF_SIZES - 1; i++) { 3383 if (FL_BUF_SIZE(i) >= bufsize) 3384 break; 3385 } 3386 3387 fl->tag_idx = i; 3388} 3389 3390static void 3391add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 3392{ 3393 mtx_lock(&sc->sfl_lock); 3394 FL_LOCK(fl); 3395 if ((fl->flags & FL_DOOMED) == 0) { 3396 fl->flags |= FL_STARVING; 3397 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 3398 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 3399 } 3400 FL_UNLOCK(fl); 3401 mtx_unlock(&sc->sfl_lock); 3402} 3403 3404static int 3405handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 3406 struct mbuf *m) 3407{ 3408 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 3409 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 3410 struct adapter *sc = iq->adapter; 3411 struct sge *s = &sc->sge; 3412 struct sge_eq *eq; 3413 3414 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 3415 rss->opcode)); 3416 3417 eq = s->eqmap[qid - s->eq_start]; 3418 EQ_LOCK(eq); 3419 KASSERT(eq->flags & EQ_CRFLUSHED, 3420 ("%s: unsolicited egress update", __func__)); 3421 eq->flags &= ~EQ_CRFLUSHED; 3422 eq->egr_update++; 3423 3424 if (__predict_false(eq->flags & EQ_DOOMED)) 3425 wakeup_one(eq); 3426 else if (eq->flags & EQ_STALLED && can_resume_tx(eq)) 3427 taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task); 3428 EQ_UNLOCK(eq); 3429 3430 return (0); 3431} 3432 3433static int 3434handle_fw_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 3435{ 3436 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 3437 3438 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 3439 rss->opcode)); 3440 3441 if (cpl->type == FW6_TYPE_CMD_RPL) 3442 t4_handle_fw_rpl(iq->adapter, cpl->data); 3443 3444 return (0); 3445} 3446 3447static int 3448sysctl_uint16(SYSCTL_HANDLER_ARGS) 3449{ 3450 uint16_t *id = arg1; 3451 int i = *id; 3452 3453 return sysctl_handle_int(oidp, &i, 0, req); 3454} 3455