t4_sge.c revision 237799
1/*- 2 * Copyright (c) 2011 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_sge.c 237799 2012-06-29 16:50:52Z np $"); 30 31#include "opt_inet.h" 32 33#include <sys/types.h> 34#include <sys/mbuf.h> 35#include <sys/socket.h> 36#include <sys/kernel.h> 37#include <sys/kdb.h> 38#include <sys/malloc.h> 39#include <sys/queue.h> 40#include <sys/taskqueue.h> 41#include <sys/sysctl.h> 42#include <sys/smp.h> 43#include <net/bpf.h> 44#include <net/ethernet.h> 45#include <net/if.h> 46#include <net/if_vlan_var.h> 47#include <netinet/in.h> 48#include <netinet/ip.h> 49#include <netinet/tcp.h> 50 51#include "common/common.h" 52#include "common/t4_regs.h" 53#include "common/t4_regs_values.h" 54#include "common/t4_msg.h" 55 56struct fl_buf_info { 57 int size; 58 int type; 59 uma_zone_t zone; 60}; 61 62/* Filled up by t4_sge_modload */ 63static struct fl_buf_info fl_buf_info[FL_BUF_SIZES]; 64 65#define FL_BUF_SIZE(x) (fl_buf_info[x].size) 66#define FL_BUF_TYPE(x) (fl_buf_info[x].type) 67#define FL_BUF_ZONE(x) (fl_buf_info[x].zone) 68 69enum { 70 FL_PKTSHIFT = 2 71}; 72 73static int fl_pad = CACHE_LINE_SIZE; 74static int spg_len = 64; 75 76/* Used to track coalesced tx work request */ 77struct txpkts { 78 uint64_t *flitp; /* ptr to flit where next pkt should start */ 79 uint8_t npkt; /* # of packets in this work request */ 80 uint8_t nflits; /* # of flits used by this work request */ 81 uint16_t plen; /* total payload (sum of all packets) */ 82}; 83 84/* A packet's SGL. This + m_pkthdr has all info needed for tx */ 85struct sgl { 86 int nsegs; /* # of segments in the SGL, 0 means imm. tx */ 87 int nflits; /* # of flits needed for the SGL */ 88 bus_dma_segment_t seg[TX_SGL_SEGS]; 89}; 90 91static int service_iq(struct sge_iq *, int); 92static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t, 93 int *); 94static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); 95static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, 96 int, char *); 97static inline void init_fl(struct sge_fl *, int, int, char *); 98static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t, 99 char *); 100static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 101 bus_addr_t *, void **); 102static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 103 void *); 104static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, 105 int, int); 106static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); 107static int alloc_fwq(struct adapter *); 108static int free_fwq(struct adapter *); 109static int alloc_mgmtq(struct adapter *); 110static int free_mgmtq(struct adapter *); 111static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int, 112 struct sysctl_oid *); 113static int free_rxq(struct port_info *, struct sge_rxq *); 114#ifdef TCP_OFFLOAD 115static int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int, 116 struct sysctl_oid *); 117static int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *); 118#endif 119static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 120static int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *); 121#ifdef TCP_OFFLOAD 122static int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *); 123#endif 124static int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *); 125static int free_eq(struct adapter *, struct sge_eq *); 126static int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *, 127 struct sysctl_oid *); 128static int free_wrq(struct adapter *, struct sge_wrq *); 129static int alloc_txq(struct port_info *, struct sge_txq *, int, 130 struct sysctl_oid *); 131static int free_txq(struct port_info *, struct sge_txq *); 132static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 133static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **); 134static inline void iq_next(struct sge_iq *); 135static inline void ring_fl_db(struct adapter *, struct sge_fl *); 136static int refill_fl(struct adapter *, struct sge_fl *, int); 137static void refill_sfl(void *); 138static int alloc_fl_sdesc(struct sge_fl *); 139static void free_fl_sdesc(struct sge_fl *); 140static void set_fl_tag_idx(struct sge_fl *, int); 141static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 142 143static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int); 144static int free_pkt_sgl(struct sge_txq *, struct sgl *); 145static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *, 146 struct sgl *); 147static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *, 148 struct mbuf *, struct sgl *); 149static void write_txpkts_wr(struct sge_txq *, struct txpkts *); 150static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *, 151 struct txpkts *, struct mbuf *, struct sgl *); 152static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *); 153static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 154static inline void ring_eq_db(struct adapter *, struct sge_eq *); 155static inline int reclaimable(struct sge_eq *); 156static int reclaim_tx_descs(struct sge_txq *, int, int); 157static void write_eqflush_wr(struct sge_eq *); 158static __be64 get_flit(bus_dma_segment_t *, int, int); 159static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 160 struct mbuf *); 161static int handle_fw_rpl(struct sge_iq *, const struct rss_header *, 162 struct mbuf *); 163 164static int sysctl_uint16(SYSCTL_HANDLER_ARGS); 165 166#if defined(__i386__) || defined(__amd64__) 167extern u_int cpu_clflush_line_size; 168#endif 169 170/* 171 * Called on MOD_LOAD and fills up fl_buf_info[]. 172 */ 173void 174t4_sge_modload(void) 175{ 176 int i; 177 int bufsize[FL_BUF_SIZES] = { 178 MCLBYTES, 179#if MJUMPAGESIZE != MCLBYTES 180 MJUMPAGESIZE, 181#endif 182 MJUM9BYTES, 183 MJUM16BYTES 184 }; 185 186 for (i = 0; i < FL_BUF_SIZES; i++) { 187 FL_BUF_SIZE(i) = bufsize[i]; 188 FL_BUF_TYPE(i) = m_gettype(bufsize[i]); 189 FL_BUF_ZONE(i) = m_getzone(bufsize[i]); 190 } 191 192#if defined(__i386__) || defined(__amd64__) 193 fl_pad = max(cpu_clflush_line_size, 32); 194 spg_len = cpu_clflush_line_size > 64 ? 128 : 64; 195#endif 196} 197 198/** 199 * t4_sge_init - initialize SGE 200 * @sc: the adapter 201 * 202 * Performs SGE initialization needed every time after a chip reset. 203 * We do not initialize any of the queues here, instead the driver 204 * top-level must request them individually. 205 */ 206int 207t4_sge_init(struct adapter *sc) 208{ 209 struct sge *s = &sc->sge; 210 int i, rc = 0; 211 uint32_t ctrl_mask, ctrl_val, hpsize, v; 212 213 ctrl_mask = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | 214 V_INGPADBOUNDARY(M_INGPADBOUNDARY) | 215 F_EGRSTATUSPAGESIZE; 216 ctrl_val = V_PKTSHIFT(FL_PKTSHIFT) | F_RXPKTCPLMODE | 217 V_INGPADBOUNDARY(ilog2(fl_pad) - 5) | 218 V_EGRSTATUSPAGESIZE(spg_len == 128); 219 220 hpsize = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 221 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 222 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 223 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 224 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 225 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 226 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 227 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 228 229 if (sc->flags & MASTER_PF) { 230 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 231 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 232 233 t4_set_reg_field(sc, A_SGE_CONTROL, ctrl_mask, ctrl_val); 234 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, hpsize); 235 for (i = 0; i < FL_BUF_SIZES; i++) { 236 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), 237 FL_BUF_SIZE(i)); 238 } 239 240 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, 241 V_THRESHOLD_0(intr_pktcount[0]) | 242 V_THRESHOLD_1(intr_pktcount[1]) | 243 V_THRESHOLD_2(intr_pktcount[2]) | 244 V_THRESHOLD_3(intr_pktcount[3])); 245 246 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, 247 V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 248 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1]))); 249 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, 250 V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 251 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3]))); 252 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, 253 V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 254 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5]))); 255 } 256 257 v = t4_read_reg(sc, A_SGE_CONTROL); 258 if ((v & ctrl_mask) != ctrl_val) { 259 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", v); 260 rc = EINVAL; 261 } 262 263 v = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE); 264 if (v != hpsize) { 265 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", v); 266 rc = EINVAL; 267 } 268 269 for (i = 0; i < FL_BUF_SIZES; i++) { 270 v = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i)); 271 if (v != FL_BUF_SIZE(i)) { 272 device_printf(sc->dev, 273 "invalid SGE_FL_BUFFER_SIZE[%d](0x%x)\n", i, v); 274 rc = EINVAL; 275 } 276 } 277 278 v = t4_read_reg(sc, A_SGE_CONM_CTRL); 279 s->fl_starve_threshold = G_EGRTHRESHOLD(v) * 2 + 1; 280 281 v = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD); 282 sc->sge.counter_val[0] = G_THRESHOLD_0(v); 283 sc->sge.counter_val[1] = G_THRESHOLD_1(v); 284 sc->sge.counter_val[2] = G_THRESHOLD_2(v); 285 sc->sge.counter_val[3] = G_THRESHOLD_3(v); 286 287 v = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1); 288 sc->sge.timer_val[0] = G_TIMERVALUE0(v) / core_ticks_per_usec(sc); 289 sc->sge.timer_val[1] = G_TIMERVALUE1(v) / core_ticks_per_usec(sc); 290 v = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3); 291 sc->sge.timer_val[2] = G_TIMERVALUE2(v) / core_ticks_per_usec(sc); 292 sc->sge.timer_val[3] = G_TIMERVALUE3(v) / core_ticks_per_usec(sc); 293 v = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5); 294 sc->sge.timer_val[4] = G_TIMERVALUE4(v) / core_ticks_per_usec(sc); 295 sc->sge.timer_val[5] = G_TIMERVALUE5(v) / core_ticks_per_usec(sc); 296 297 t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_rpl); 298 t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_rpl); 299 t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 300 t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx); 301 302 return (rc); 303} 304 305int 306t4_create_dma_tag(struct adapter *sc) 307{ 308 int rc; 309 310 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 311 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 312 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 313 NULL, &sc->dmat); 314 if (rc != 0) { 315 device_printf(sc->dev, 316 "failed to create main DMA tag: %d\n", rc); 317 } 318 319 return (rc); 320} 321 322int 323t4_destroy_dma_tag(struct adapter *sc) 324{ 325 if (sc->dmat) 326 bus_dma_tag_destroy(sc->dmat); 327 328 return (0); 329} 330 331/* 332 * Allocate and initialize the firmware event queue and the management queue. 333 * 334 * Returns errno on failure. Resources allocated up to that point may still be 335 * allocated. Caller is responsible for cleanup in case this function fails. 336 */ 337int 338t4_setup_adapter_queues(struct adapter *sc) 339{ 340 int rc; 341 342 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 343 344 sysctl_ctx_init(&sc->ctx); 345 sc->flags |= ADAP_SYSCTL_CTX; 346 347 /* 348 * Firmware event queue 349 */ 350 rc = alloc_fwq(sc); 351 if (rc != 0) { 352 device_printf(sc->dev, 353 "failed to create firmware event queue: %d\n", rc); 354 return (rc); 355 } 356 357 /* 358 * Management queue. This is just a control queue that uses the fwq as 359 * its associated iq. 360 */ 361 rc = alloc_mgmtq(sc); 362 if (rc != 0) { 363 device_printf(sc->dev, 364 "failed to create management queue: %d\n", rc); 365 return (rc); 366 } 367 368 return (rc); 369} 370 371/* 372 * Idempotent 373 */ 374int 375t4_teardown_adapter_queues(struct adapter *sc) 376{ 377 378 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 379 380 /* Do this before freeing the queue */ 381 if (sc->flags & ADAP_SYSCTL_CTX) { 382 sysctl_ctx_free(&sc->ctx); 383 sc->flags &= ~ADAP_SYSCTL_CTX; 384 } 385 386 free_mgmtq(sc); 387 free_fwq(sc); 388 389 return (0); 390} 391 392static inline int 393first_vector(struct port_info *pi) 394{ 395 struct adapter *sc = pi->adapter; 396 int rc = T4_EXTRA_INTR, i; 397 398 if (sc->intr_count == 1) 399 return (0); 400 401 for_each_port(sc, i) { 402 if (i == pi->port_id) 403 break; 404 405#ifdef TCP_OFFLOAD 406 if (sc->flags & INTR_DIRECT) 407 rc += pi->nrxq + pi->nofldrxq; 408 else 409 rc += max(pi->nrxq, pi->nofldrxq); 410#else 411 /* 412 * Not compiled with offload support and intr_count > 1. Only 413 * NIC queues exist and they'd better be taking direct 414 * interrupts. 415 */ 416 KASSERT(sc->flags & INTR_DIRECT, 417 ("%s: intr_count %d, !INTR_DIRECT", __func__, 418 sc->intr_count)); 419 420 rc += pi->nrxq; 421#endif 422 } 423 424 return (rc); 425} 426 427/* 428 * Given an arbitrary "index," come up with an iq that can be used by other 429 * queues (of this port) for interrupt forwarding, SGE egress updates, etc. 430 * The iq returned is guaranteed to be something that takes direct interrupts. 431 */ 432static struct sge_iq * 433port_intr_iq(struct port_info *pi, int idx) 434{ 435 struct adapter *sc = pi->adapter; 436 struct sge *s = &sc->sge; 437 struct sge_iq *iq = NULL; 438 439 if (sc->intr_count == 1) 440 return (&sc->sge.fwq); 441 442#ifdef TCP_OFFLOAD 443 if (sc->flags & INTR_DIRECT) { 444 idx %= pi->nrxq + pi->nofldrxq; 445 446 if (idx >= pi->nrxq) { 447 idx -= pi->nrxq; 448 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; 449 } else 450 iq = &s->rxq[pi->first_rxq + idx].iq; 451 452 } else { 453 idx %= max(pi->nrxq, pi->nofldrxq); 454 455 if (pi->nrxq >= pi->nofldrxq) 456 iq = &s->rxq[pi->first_rxq + idx].iq; 457 else 458 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; 459 } 460#else 461 /* 462 * Not compiled with offload support and intr_count > 1. Only NIC 463 * queues exist and they'd better be taking direct interrupts. 464 */ 465 KASSERT(sc->flags & INTR_DIRECT, 466 ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count)); 467 468 idx %= pi->nrxq; 469 iq = &s->rxq[pi->first_rxq + idx].iq; 470#endif 471 472 KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__)); 473 return (iq); 474} 475 476int 477t4_setup_port_queues(struct port_info *pi) 478{ 479 int rc = 0, i, j, intr_idx, iqid; 480 struct sge_rxq *rxq; 481 struct sge_txq *txq; 482 struct sge_wrq *ctrlq; 483#ifdef TCP_OFFLOAD 484 struct sge_ofld_rxq *ofld_rxq; 485 struct sge_wrq *ofld_txq; 486 struct sysctl_oid *oid2 = NULL; 487#endif 488 char name[16]; 489 struct adapter *sc = pi->adapter; 490 struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); 491 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 492 493 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, 494 NULL, "rx queues"); 495 496#ifdef TCP_OFFLOAD 497 if (is_offload(sc)) { 498 oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq", 499 CTLFLAG_RD, NULL, 500 "rx queues for offloaded TCP connections"); 501 } 502#endif 503 504 /* Interrupt vector to start from (when using multiple vectors) */ 505 intr_idx = first_vector(pi); 506 507 /* 508 * First pass over all rx queues (NIC and TOE): 509 * a) initialize iq and fl 510 * b) allocate queue iff it will take direct interrupts. 511 */ 512 for_each_rxq(pi, i, rxq) { 513 514 snprintf(name, sizeof(name), "%s rxq%d-iq", 515 device_get_nameunit(pi->dev), i); 516 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq, 517 RX_IQ_ESIZE, name); 518 519 snprintf(name, sizeof(name), "%s rxq%d-fl", 520 device_get_nameunit(pi->dev), i); 521 init_fl(&rxq->fl, pi->qsize_rxq / 8, pi->ifp->if_mtu, name); 522 523 if (sc->flags & INTR_DIRECT 524#ifdef TCP_OFFLOAD 525 || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq) 526#endif 527 ) { 528 rxq->iq.flags |= IQ_INTR; 529 rc = alloc_rxq(pi, rxq, intr_idx, i, oid); 530 if (rc != 0) 531 goto done; 532 intr_idx++; 533 } 534 } 535 536#ifdef TCP_OFFLOAD 537 for_each_ofld_rxq(pi, i, ofld_rxq) { 538 539 snprintf(name, sizeof(name), "%s ofld_rxq%d-iq", 540 device_get_nameunit(pi->dev), i); 541 init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, 542 pi->qsize_rxq, RX_IQ_ESIZE, name); 543 544 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 545 device_get_nameunit(pi->dev), i); 546 init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, MJUM16BYTES, name); 547 548 if (sc->flags & INTR_DIRECT || 549 (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) { 550 ofld_rxq->iq.flags |= IQ_INTR; 551 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2); 552 if (rc != 0) 553 goto done; 554 intr_idx++; 555 } 556 } 557#endif 558 559 /* 560 * Second pass over all rx queues (NIC and TOE). The queues forwarding 561 * their interrupts are allocated now. 562 */ 563 j = 0; 564 for_each_rxq(pi, i, rxq) { 565 if (rxq->iq.flags & IQ_INTR) 566 continue; 567 568 intr_idx = port_intr_iq(pi, j)->abs_id; 569 570 rc = alloc_rxq(pi, rxq, intr_idx, i, oid); 571 if (rc != 0) 572 goto done; 573 j++; 574 } 575 576#ifdef TCP_OFFLOAD 577 for_each_ofld_rxq(pi, i, ofld_rxq) { 578 if (ofld_rxq->iq.flags & IQ_INTR) 579 continue; 580 581 intr_idx = port_intr_iq(pi, j)->abs_id; 582 583 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2); 584 if (rc != 0) 585 goto done; 586 j++; 587 } 588#endif 589 590 /* 591 * Now the tx queues. Only one pass needed. 592 */ 593 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, 594 NULL, "tx queues"); 595 j = 0; 596 for_each_txq(pi, i, txq) { 597 uint16_t iqid; 598 599 iqid = port_intr_iq(pi, j)->cntxt_id; 600 601 snprintf(name, sizeof(name), "%s txq%d", 602 device_get_nameunit(pi->dev), i); 603 init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid, 604 name); 605 606 rc = alloc_txq(pi, txq, i, oid); 607 if (rc != 0) 608 goto done; 609 j++; 610 } 611 612#ifdef TCP_OFFLOAD 613 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq", 614 CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections"); 615 for_each_ofld_txq(pi, i, ofld_txq) { 616 uint16_t iqid; 617 618 iqid = port_intr_iq(pi, j)->cntxt_id; 619 620 snprintf(name, sizeof(name), "%s ofld_txq%d", 621 device_get_nameunit(pi->dev), i); 622 init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan, 623 iqid, name); 624 625 snprintf(name, sizeof(name), "%d", i); 626 oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 627 name, CTLFLAG_RD, NULL, "offload tx queue"); 628 629 rc = alloc_wrq(sc, pi, ofld_txq, oid2); 630 if (rc != 0) 631 goto done; 632 j++; 633 } 634#endif 635 636 /* 637 * Finally, the control queue. 638 */ 639 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, 640 NULL, "ctrl queue"); 641 ctrlq = &sc->sge.ctrlq[pi->port_id]; 642 iqid = port_intr_iq(pi, 0)->cntxt_id; 643 snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev)); 644 init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name); 645 rc = alloc_wrq(sc, pi, ctrlq, oid); 646 647done: 648 if (rc) 649 t4_teardown_port_queues(pi); 650 651 return (rc); 652} 653 654/* 655 * Idempotent 656 */ 657int 658t4_teardown_port_queues(struct port_info *pi) 659{ 660 int i; 661 struct adapter *sc = pi->adapter; 662 struct sge_rxq *rxq; 663 struct sge_txq *txq; 664#ifdef TCP_OFFLOAD 665 struct sge_ofld_rxq *ofld_rxq; 666 struct sge_wrq *ofld_txq; 667#endif 668 669 /* Do this before freeing the queues */ 670 if (pi->flags & PORT_SYSCTL_CTX) { 671 sysctl_ctx_free(&pi->ctx); 672 pi->flags &= ~PORT_SYSCTL_CTX; 673 } 674 675 /* 676 * Take down all the tx queues first, as they reference the rx queues 677 * (for egress updates, etc.). 678 */ 679 680 free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); 681 682 for_each_txq(pi, i, txq) { 683 free_txq(pi, txq); 684 } 685 686#ifdef TCP_OFFLOAD 687 for_each_ofld_txq(pi, i, ofld_txq) { 688 free_wrq(sc, ofld_txq); 689 } 690#endif 691 692 /* 693 * Then take down the rx queues that forward their interrupts, as they 694 * reference other rx queues. 695 */ 696 697 for_each_rxq(pi, i, rxq) { 698 if ((rxq->iq.flags & IQ_INTR) == 0) 699 free_rxq(pi, rxq); 700 } 701 702#ifdef TCP_OFFLOAD 703 for_each_ofld_rxq(pi, i, ofld_rxq) { 704 if ((ofld_rxq->iq.flags & IQ_INTR) == 0) 705 free_ofld_rxq(pi, ofld_rxq); 706 } 707#endif 708 709 /* 710 * Then take down the rx queues that take direct interrupts. 711 */ 712 713 for_each_rxq(pi, i, rxq) { 714 if (rxq->iq.flags & IQ_INTR) 715 free_rxq(pi, rxq); 716 } 717 718#ifdef TCP_OFFLOAD 719 for_each_ofld_rxq(pi, i, ofld_rxq) { 720 if (ofld_rxq->iq.flags & IQ_INTR) 721 free_ofld_rxq(pi, ofld_rxq); 722 } 723#endif 724 725 return (0); 726} 727 728/* 729 * Deals with errors and the firmware event queue. All data rx queues forward 730 * their interrupt to the firmware event queue. 731 */ 732void 733t4_intr_all(void *arg) 734{ 735 struct adapter *sc = arg; 736 struct sge_iq *fwq = &sc->sge.fwq; 737 738 t4_intr_err(arg); 739 if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) { 740 service_iq(fwq, 0); 741 atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE); 742 } 743} 744 745/* Deals with error interrupts */ 746void 747t4_intr_err(void *arg) 748{ 749 struct adapter *sc = arg; 750 751 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 752 t4_slow_intr_handler(sc); 753} 754 755void 756t4_intr_evt(void *arg) 757{ 758 struct sge_iq *iq = arg; 759 760 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 761 service_iq(iq, 0); 762 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 763 } 764} 765 766void 767t4_intr(void *arg) 768{ 769 struct sge_iq *iq = arg; 770 771 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 772 service_iq(iq, 0); 773 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 774 } 775} 776 777/* 778 * Deals with anything and everything on the given ingress queue. 779 */ 780static int 781service_iq(struct sge_iq *iq, int budget) 782{ 783 struct sge_iq *q; 784 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ 785 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */ 786 struct adapter *sc = iq->adapter; 787 struct rsp_ctrl *ctrl; 788 const struct rss_header *rss; 789 int ndescs = 0, limit, fl_bufs_used = 0; 790 int rsp_type; 791 uint32_t lq; 792 struct mbuf *m0; 793 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 794 795 limit = budget ? budget : iq->qsize / 8; 796 797 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 798 799 /* 800 * We always come back and check the descriptor ring for new indirect 801 * interrupts and other responses after running a single handler. 802 */ 803 for (;;) { 804 while (is_new_response(iq, &ctrl)) { 805 806 rmb(); 807 808 m0 = NULL; 809 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); 810 lq = be32toh(ctrl->pldbuflen_qid); 811 rss = (const void *)iq->cdesc; 812 813 switch (rsp_type) { 814 case X_RSPD_TYPE_FLBUF: 815 816 KASSERT(iq->flags & IQ_HAS_FL, 817 ("%s: data for an iq (%p) with no freelist", 818 __func__, iq)); 819 820 m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used); 821#ifdef T4_PKT_TIMESTAMP 822 /* 823 * 60 bit timestamp for the payload is 824 * *(uint64_t *)m0->m_pktdat. Note that it is 825 * in the leading free-space in the mbuf. The 826 * kernel can clobber it during a pullup, 827 * m_copymdata, etc. You need to make sure that 828 * the mbuf reaches you unmolested if you care 829 * about the timestamp. 830 */ 831 *(uint64_t *)m0->m_pktdat = 832 be64toh(ctrl->u.last_flit) & 833 0xfffffffffffffff; 834#endif 835 836 /* fall through */ 837 838 case X_RSPD_TYPE_CPL: 839 KASSERT(rss->opcode < NUM_CPL_CMDS, 840 ("%s: bad opcode %02x.", __func__, 841 rss->opcode)); 842 sc->cpl_handler[rss->opcode](iq, rss, m0); 843 break; 844 845 case X_RSPD_TYPE_INTR: 846 847 /* 848 * Interrupts should be forwarded only to queues 849 * that are not forwarding their interrupts. 850 * This means service_iq can recurse but only 1 851 * level deep. 852 */ 853 KASSERT(budget == 0, 854 ("%s: budget %u, rsp_type %u", __func__, 855 budget, rsp_type)); 856 857 q = sc->sge.iqmap[lq - sc->sge.iq_start]; 858 if (atomic_cmpset_int(&q->state, IQS_IDLE, 859 IQS_BUSY)) { 860 if (service_iq(q, q->qsize / 8) == 0) { 861 atomic_cmpset_int(&q->state, 862 IQS_BUSY, IQS_IDLE); 863 } else { 864 STAILQ_INSERT_TAIL(&iql, q, 865 link); 866 } 867 } 868 break; 869 870 default: 871 sc->an_handler(iq, ctrl); 872 break; 873 } 874 875 iq_next(iq); 876 if (++ndescs == limit) { 877 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 878 V_CIDXINC(ndescs) | 879 V_INGRESSQID(iq->cntxt_id) | 880 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 881 ndescs = 0; 882 883 if (fl_bufs_used > 0) { 884 FL_LOCK(fl); 885 fl->needed += fl_bufs_used; 886 refill_fl(sc, fl, fl->cap / 8); 887 FL_UNLOCK(fl); 888 fl_bufs_used = 0; 889 } 890 891 if (budget) 892 return (EINPROGRESS); 893 } 894 } 895 896 if (STAILQ_EMPTY(&iql)) 897 break; 898 899 /* 900 * Process the head only, and send it to the back of the list if 901 * it's still not done. 902 */ 903 q = STAILQ_FIRST(&iql); 904 STAILQ_REMOVE_HEAD(&iql, link); 905 if (service_iq(q, q->qsize / 8) == 0) 906 atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 907 else 908 STAILQ_INSERT_TAIL(&iql, q, link); 909 } 910 911#ifdef INET 912 if (iq->flags & IQ_LRO_ENABLED) { 913 struct lro_ctrl *lro = &rxq->lro; 914 struct lro_entry *l; 915 916 while (!SLIST_EMPTY(&lro->lro_active)) { 917 l = SLIST_FIRST(&lro->lro_active); 918 SLIST_REMOVE_HEAD(&lro->lro_active, next); 919 tcp_lro_flush(lro, l); 920 } 921 } 922#endif 923 924 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | 925 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 926 927 if (iq->flags & IQ_HAS_FL) { 928 int starved; 929 930 FL_LOCK(fl); 931 fl->needed += fl_bufs_used; 932 starved = refill_fl(sc, fl, fl->cap / 4); 933 FL_UNLOCK(fl); 934 if (__predict_false(starved != 0)) 935 add_fl_to_sfl(sc, fl); 936 } 937 938 return (0); 939} 940 941 942#ifdef T4_PKT_TIMESTAMP 943#define RX_COPY_THRESHOLD (MINCLSIZE - 8) 944#else 945#define RX_COPY_THRESHOLD MINCLSIZE 946#endif 947 948static struct mbuf * 949get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf, 950 int *fl_bufs_used) 951{ 952 struct mbuf *m0, *m; 953 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 954 unsigned int nbuf, len; 955 956 /* 957 * No assertion for the fl lock because we don't need it. This routine 958 * is called only from the rx interrupt handler and it only updates 959 * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be 960 * updated in the rx interrupt handler or the starvation helper routine. 961 * That's why code that manipulates fl->pidx/fl->needed needs the fl 962 * lock but this routine does not). 963 */ 964 965 if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0)) 966 panic("%s: cannot handle packed frames", __func__); 967 len = G_RSPD_LEN(len_newbuf); 968 969 m0 = sd->m; 970 sd->m = NULL; /* consumed */ 971 972 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); 973 m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR); 974#ifdef T4_PKT_TIMESTAMP 975 /* Leave room for a timestamp */ 976 m0->m_data += 8; 977#endif 978 979 if (len < RX_COPY_THRESHOLD) { 980 /* copy data to mbuf, buffer will be recycled */ 981 bcopy(sd->cl, mtod(m0, caddr_t), len); 982 m0->m_len = len; 983 } else { 984 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 985 m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx)); 986 sd->cl = NULL; /* consumed */ 987 m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); 988 } 989 m0->m_pkthdr.len = len; 990 991 sd++; 992 if (__predict_false(++fl->cidx == fl->cap)) { 993 sd = fl->sdesc; 994 fl->cidx = 0; 995 } 996 997 m = m0; 998 len -= m->m_len; 999 nbuf = 1; /* # of fl buffers used */ 1000 1001 while (len > 0) { 1002 m->m_next = sd->m; 1003 sd->m = NULL; /* consumed */ 1004 m = m->m_next; 1005 1006 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, 1007 BUS_DMASYNC_POSTREAD); 1008 1009 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); 1010 if (len <= MLEN) { 1011 bcopy(sd->cl, mtod(m, caddr_t), len); 1012 m->m_len = len; 1013 } else { 1014 bus_dmamap_unload(fl->tag[sd->tag_idx], 1015 sd->map); 1016 m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx)); 1017 sd->cl = NULL; /* consumed */ 1018 m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); 1019 } 1020 1021 sd++; 1022 if (__predict_false(++fl->cidx == fl->cap)) { 1023 sd = fl->sdesc; 1024 fl->cidx = 0; 1025 } 1026 1027 len -= m->m_len; 1028 nbuf++; 1029 } 1030 1031 (*fl_bufs_used) += nbuf; 1032 1033 return (m0); 1034} 1035 1036static int 1037t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 1038{ 1039 struct sge_rxq *rxq = iq_to_rxq(iq); 1040 struct ifnet *ifp = rxq->ifp; 1041 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); 1042#ifdef INET 1043 struct lro_ctrl *lro = &rxq->lro; 1044#endif 1045 1046 KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, 1047 rss->opcode)); 1048 1049 m0->m_pkthdr.len -= FL_PKTSHIFT; 1050 m0->m_len -= FL_PKTSHIFT; 1051 m0->m_data += FL_PKTSHIFT; 1052 1053 m0->m_pkthdr.rcvif = ifp; 1054 m0->m_flags |= M_FLOWID; 1055 m0->m_pkthdr.flowid = rss->hash_val; 1056 1057 if (cpl->csum_calc && !cpl->err_vec) { 1058 if (ifp->if_capenable & IFCAP_RXCSUM && 1059 cpl->l2info & htobe32(F_RXF_IP)) { 1060 m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | 1061 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1062 rxq->rxcsum++; 1063 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 1064 cpl->l2info & htobe32(F_RXF_IP6)) { 1065 m0->m_pkthdr.csum_flags |= (CSUM_DATA_VALID_IPV6 | 1066 CSUM_PSEUDO_HDR); 1067 rxq->rxcsum++; 1068 } 1069 1070 if (__predict_false(cpl->ip_frag)) 1071 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 1072 else 1073 m0->m_pkthdr.csum_data = 0xffff; 1074 } 1075 1076 if (cpl->vlan_ex) { 1077 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 1078 m0->m_flags |= M_VLANTAG; 1079 rxq->vlan_extraction++; 1080 } 1081 1082#ifdef INET 1083 if (cpl->l2info & htobe32(F_RXF_LRO) && 1084 iq->flags & IQ_LRO_ENABLED && 1085 tcp_lro_rx(lro, m0, 0) == 0) { 1086 /* queued for LRO */ 1087 } else 1088#endif 1089 ifp->if_input(ifp, m0); 1090 1091 return (0); 1092} 1093 1094/* 1095 * Doesn't fail. Holds on to work requests it can't send right away. 1096 */ 1097void 1098t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 1099{ 1100 struct sge_eq *eq = &wrq->eq; 1101 int can_reclaim; 1102 caddr_t dst; 1103 1104 TXQ_LOCK_ASSERT_OWNED(wrq); 1105#ifdef TCP_OFFLOAD 1106 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD || 1107 (eq->flags & EQ_TYPEMASK) == EQ_CTRL, 1108 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); 1109#else 1110 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL, 1111 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); 1112#endif 1113 1114 if (__predict_true(wr != NULL)) 1115 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 1116 1117 can_reclaim = reclaimable(eq); 1118 if (__predict_false(eq->flags & EQ_STALLED)) { 1119 if (can_reclaim < tx_resume_threshold(eq)) 1120 return; 1121 eq->flags &= ~EQ_STALLED; 1122 eq->unstalled++; 1123 } 1124 eq->cidx += can_reclaim; 1125 eq->avail += can_reclaim; 1126 if (__predict_false(eq->cidx >= eq->cap)) 1127 eq->cidx -= eq->cap; 1128 1129 while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) { 1130 int ndesc; 1131 1132 if (__predict_false(wr->wr_len < 0 || 1133 wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) { 1134 1135#ifdef INVARIANTS 1136 panic("%s: work request with length %d", __func__, 1137 wr->wr_len); 1138#endif 1139#ifdef KDB 1140 kdb_backtrace(); 1141#endif 1142 log(LOG_ERR, "%s: %s work request with length %d", 1143 device_get_nameunit(sc->dev), __func__, wr->wr_len); 1144 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 1145 free_wrqe(wr); 1146 continue; 1147 } 1148 1149 ndesc = howmany(wr->wr_len, EQ_ESIZE); 1150 if (eq->avail < ndesc) { 1151 wrq->no_desc++; 1152 break; 1153 } 1154 1155 dst = (void *)&eq->desc[eq->pidx]; 1156 copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len); 1157 1158 eq->pidx += ndesc; 1159 eq->avail -= ndesc; 1160 if (__predict_false(eq->pidx >= eq->cap)) 1161 eq->pidx -= eq->cap; 1162 1163 eq->pending += ndesc; 1164 if (eq->pending > 16) 1165 ring_eq_db(sc, eq); 1166 1167 wrq->tx_wrs++; 1168 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 1169 free_wrqe(wr); 1170 1171 if (eq->avail < 8) { 1172 can_reclaim = reclaimable(eq); 1173 eq->cidx += can_reclaim; 1174 eq->avail += can_reclaim; 1175 if (__predict_false(eq->cidx >= eq->cap)) 1176 eq->cidx -= eq->cap; 1177 } 1178 } 1179 1180 if (eq->pending) 1181 ring_eq_db(sc, eq); 1182 1183 if (wr != NULL) { 1184 eq->flags |= EQ_STALLED; 1185 if (callout_pending(&eq->tx_callout) == 0) 1186 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq); 1187 } 1188} 1189 1190/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */ 1191#define TXPKTS_PKT_HDR ((\ 1192 sizeof(struct ulp_txpkt) + \ 1193 sizeof(struct ulptx_idata) + \ 1194 sizeof(struct cpl_tx_pkt_core) \ 1195 ) / 8) 1196 1197/* Header of a coalesced tx WR, before SGL of first packet (in flits) */ 1198#define TXPKTS_WR_HDR (\ 1199 sizeof(struct fw_eth_tx_pkts_wr) / 8 + \ 1200 TXPKTS_PKT_HDR) 1201 1202/* Header of a tx WR, before SGL of first packet (in flits) */ 1203#define TXPKT_WR_HDR ((\ 1204 sizeof(struct fw_eth_tx_pkt_wr) + \ 1205 sizeof(struct cpl_tx_pkt_core) \ 1206 ) / 8 ) 1207 1208/* Header of a tx LSO WR, before SGL of first packet (in flits) */ 1209#define TXPKT_LSO_WR_HDR ((\ 1210 sizeof(struct fw_eth_tx_pkt_wr) + \ 1211 sizeof(struct cpl_tx_pkt_lso_core) + \ 1212 sizeof(struct cpl_tx_pkt_core) \ 1213 ) / 8 ) 1214 1215int 1216t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m) 1217{ 1218 struct port_info *pi = (void *)ifp->if_softc; 1219 struct adapter *sc = pi->adapter; 1220 struct sge_eq *eq = &txq->eq; 1221 struct buf_ring *br = txq->br; 1222 struct mbuf *next; 1223 int rc, coalescing, can_reclaim; 1224 struct txpkts txpkts; 1225 struct sgl sgl; 1226 1227 TXQ_LOCK_ASSERT_OWNED(txq); 1228 KASSERT(m, ("%s: called with nothing to do.", __func__)); 1229 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH, 1230 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); 1231 1232 prefetch(&eq->desc[eq->pidx]); 1233 prefetch(&txq->sdesc[eq->pidx]); 1234 1235 txpkts.npkt = 0;/* indicates there's nothing in txpkts */ 1236 coalescing = 0; 1237 1238 can_reclaim = reclaimable(eq); 1239 if (__predict_false(eq->flags & EQ_STALLED)) { 1240 if (can_reclaim < tx_resume_threshold(eq)) { 1241 txq->m = m; 1242 return (0); 1243 } 1244 eq->flags &= ~EQ_STALLED; 1245 eq->unstalled++; 1246 } 1247 1248 if (__predict_false(eq->flags & EQ_DOOMED)) { 1249 m_freem(m); 1250 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 1251 m_freem(m); 1252 return (ENETDOWN); 1253 } 1254 1255 if (eq->avail < 8 && can_reclaim) 1256 reclaim_tx_descs(txq, can_reclaim, 32); 1257 1258 for (; m; m = next ? next : drbr_dequeue(ifp, br)) { 1259 1260 if (eq->avail < 8) 1261 break; 1262 1263 next = m->m_nextpkt; 1264 m->m_nextpkt = NULL; 1265 1266 if (next || buf_ring_peek(br)) 1267 coalescing = 1; 1268 1269 rc = get_pkt_sgl(txq, &m, &sgl, coalescing); 1270 if (rc != 0) { 1271 if (rc == ENOMEM) { 1272 1273 /* Short of resources, suspend tx */ 1274 1275 m->m_nextpkt = next; 1276 break; 1277 } 1278 1279 /* 1280 * Unrecoverable error for this packet, throw it away 1281 * and move on to the next. get_pkt_sgl may already 1282 * have freed m (it will be NULL in that case and the 1283 * m_freem here is still safe). 1284 */ 1285 1286 m_freem(m); 1287 continue; 1288 } 1289 1290 if (coalescing && 1291 add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) { 1292 1293 /* Successfully absorbed into txpkts */ 1294 1295 write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl); 1296 goto doorbell; 1297 } 1298 1299 /* 1300 * We weren't coalescing to begin with, or current frame could 1301 * not be coalesced (add_to_txpkts flushes txpkts if a frame 1302 * given to it can't be coalesced). Either way there should be 1303 * nothing in txpkts. 1304 */ 1305 KASSERT(txpkts.npkt == 0, 1306 ("%s: txpkts not empty: %d", __func__, txpkts.npkt)); 1307 1308 /* We're sending out individual packets now */ 1309 coalescing = 0; 1310 1311 if (eq->avail < 8) 1312 reclaim_tx_descs(txq, 0, 8); 1313 rc = write_txpkt_wr(pi, txq, m, &sgl); 1314 if (rc != 0) { 1315 1316 /* Short of hardware descriptors, suspend tx */ 1317 1318 /* 1319 * This is an unlikely but expensive failure. We've 1320 * done all the hard work (DMA mappings etc.) and now we 1321 * can't send out the packet. What's worse, we have to 1322 * spend even more time freeing up everything in sgl. 1323 */ 1324 txq->no_desc++; 1325 free_pkt_sgl(txq, &sgl); 1326 1327 m->m_nextpkt = next; 1328 break; 1329 } 1330 1331 ETHER_BPF_MTAP(ifp, m); 1332 if (sgl.nsegs == 0) 1333 m_freem(m); 1334doorbell: 1335 if (eq->pending >= 64) 1336 ring_eq_db(sc, eq); 1337 1338 can_reclaim = reclaimable(eq); 1339 if (can_reclaim >= 32) 1340 reclaim_tx_descs(txq, can_reclaim, 64); 1341 } 1342 1343 if (txpkts.npkt > 0) 1344 write_txpkts_wr(txq, &txpkts); 1345 1346 /* 1347 * m not NULL means there was an error but we haven't thrown it away. 1348 * This can happen when we're short of tx descriptors (no_desc) or maybe 1349 * even DMA maps (no_dmamap). Either way, a credit flush and reclaim 1350 * will get things going again. 1351 */ 1352 if (m && !(eq->flags & EQ_CRFLUSHED)) { 1353 struct tx_sdesc *txsd = &txq->sdesc[eq->pidx]; 1354 1355 /* 1356 * If EQ_CRFLUSHED is not set then we know we have at least one 1357 * available descriptor because any WR that reduces eq->avail to 1358 * 0 also sets EQ_CRFLUSHED. 1359 */ 1360 KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__)); 1361 1362 txsd->desc_used = 1; 1363 txsd->credits = 0; 1364 write_eqflush_wr(eq); 1365 } 1366 txq->m = m; 1367 1368 if (eq->pending) 1369 ring_eq_db(sc, eq); 1370 1371 reclaim_tx_descs(txq, 0, 128); 1372 1373 if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0) 1374 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq); 1375 1376 return (0); 1377} 1378 1379void 1380t4_update_fl_bufsize(struct ifnet *ifp) 1381{ 1382 struct port_info *pi = ifp->if_softc; 1383 struct sge_rxq *rxq; 1384 struct sge_fl *fl; 1385 int i, bufsize; 1386 1387 /* large enough for a frame even when VLAN extraction is disabled */ 1388 bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ifp->if_mtu; 1389 bufsize = roundup(bufsize + FL_PKTSHIFT, fl_pad); 1390 for_each_rxq(pi, i, rxq) { 1391 fl = &rxq->fl; 1392 1393 FL_LOCK(fl); 1394 set_fl_tag_idx(fl, bufsize); 1395 FL_UNLOCK(fl); 1396 } 1397} 1398 1399int 1400can_resume_tx(struct sge_eq *eq) 1401{ 1402 return (reclaimable(eq) >= tx_resume_threshold(eq)); 1403} 1404 1405static inline void 1406init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 1407 int qsize, int esize, char *name) 1408{ 1409 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 1410 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 1411 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 1412 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 1413 1414 iq->flags = 0; 1415 iq->adapter = sc; 1416 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 1417 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 1418 if (pktc_idx >= 0) { 1419 iq->intr_params |= F_QINTR_CNT_EN; 1420 iq->intr_pktc_idx = pktc_idx; 1421 } 1422 iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */ 1423 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */ 1424 strlcpy(iq->lockname, name, sizeof(iq->lockname)); 1425} 1426 1427static inline void 1428init_fl(struct sge_fl *fl, int qsize, int bufsize, char *name) 1429{ 1430 fl->qsize = qsize; 1431 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 1432 set_fl_tag_idx(fl, bufsize); 1433} 1434 1435static inline void 1436init_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan, 1437 uint16_t iqid, char *name) 1438{ 1439 KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan)); 1440 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); 1441 1442 eq->flags = eqtype & EQ_TYPEMASK; 1443 eq->tx_chan = tx_chan; 1444 eq->iqid = iqid; 1445 eq->qsize = qsize; 1446 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 1447 1448 TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq); 1449 callout_init(&eq->tx_callout, CALLOUT_MPSAFE); 1450} 1451 1452static int 1453alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 1454 bus_dmamap_t *map, bus_addr_t *pa, void **va) 1455{ 1456 int rc; 1457 1458 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 1459 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 1460 if (rc != 0) { 1461 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 1462 goto done; 1463 } 1464 1465 rc = bus_dmamem_alloc(*tag, va, 1466 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 1467 if (rc != 0) { 1468 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 1469 goto done; 1470 } 1471 1472 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 1473 if (rc != 0) { 1474 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 1475 goto done; 1476 } 1477done: 1478 if (rc) 1479 free_ring(sc, *tag, *map, *pa, *va); 1480 1481 return (rc); 1482} 1483 1484static int 1485free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 1486 bus_addr_t pa, void *va) 1487{ 1488 if (pa) 1489 bus_dmamap_unload(tag, map); 1490 if (va) 1491 bus_dmamem_free(tag, va, map); 1492 if (tag) 1493 bus_dma_tag_destroy(tag); 1494 1495 return (0); 1496} 1497 1498/* 1499 * Allocates the ring for an ingress queue and an optional freelist. If the 1500 * freelist is specified it will be allocated and then associated with the 1501 * ingress queue. 1502 * 1503 * Returns errno on failure. Resources allocated up to that point may still be 1504 * allocated. Caller is responsible for cleanup in case this function fails. 1505 * 1506 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then 1507 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies 1508 * the abs_id of the ingress queue to which its interrupts should be forwarded. 1509 */ 1510static int 1511alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, 1512 int intr_idx, int cong) 1513{ 1514 int rc, i, cntxt_id; 1515 size_t len; 1516 struct fw_iq_cmd c; 1517 struct adapter *sc = iq->adapter; 1518 __be32 v = 0; 1519 1520 len = iq->qsize * iq->esize; 1521 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 1522 (void **)&iq->desc); 1523 if (rc != 0) 1524 return (rc); 1525 1526 bzero(&c, sizeof(c)); 1527 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 1528 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 1529 V_FW_IQ_CMD_VFN(0)); 1530 1531 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 1532 FW_LEN16(c)); 1533 1534 /* Special handling for firmware event queue */ 1535 if (iq == &sc->sge.fwq) 1536 v |= F_FW_IQ_CMD_IQASYNCH; 1537 1538 if (iq->flags & IQ_INTR) { 1539 KASSERT(intr_idx < sc->intr_count, 1540 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 1541 } else 1542 v |= F_FW_IQ_CMD_IQANDST; 1543 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 1544 1545 c.type_to_iqandstindex = htobe32(v | 1546 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 1547 V_FW_IQ_CMD_VIID(pi->viid) | 1548 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 1549 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 1550 F_FW_IQ_CMD_IQGTSMODE | 1551 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 1552 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4)); 1553 c.iqsize = htobe16(iq->qsize); 1554 c.iqaddr = htobe64(iq->ba); 1555 if (cong >= 0) 1556 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 1557 1558 if (fl) { 1559 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 1560 1561 for (i = 0; i < FL_BUF_SIZES; i++) { 1562 1563 /* 1564 * A freelist buffer must be 16 byte aligned as the SGE 1565 * uses the low 4 bits of the bus addr to figure out the 1566 * buffer size. 1567 */ 1568 rc = bus_dma_tag_create(sc->dmat, 16, 0, 1569 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1570 FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW, 1571 NULL, NULL, &fl->tag[i]); 1572 if (rc != 0) { 1573 device_printf(sc->dev, 1574 "failed to create fl DMA tag[%d]: %d\n", 1575 i, rc); 1576 return (rc); 1577 } 1578 } 1579 len = fl->qsize * RX_FL_ESIZE; 1580 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 1581 &fl->ba, (void **)&fl->desc); 1582 if (rc) 1583 return (rc); 1584 1585 /* Allocate space for one software descriptor per buffer. */ 1586 fl->cap = (fl->qsize - spg_len / RX_FL_ESIZE) * 8; 1587 FL_LOCK(fl); 1588 rc = alloc_fl_sdesc(fl); 1589 FL_UNLOCK(fl); 1590 if (rc != 0) { 1591 device_printf(sc->dev, 1592 "failed to setup fl software descriptors: %d\n", 1593 rc); 1594 return (rc); 1595 } 1596 fl->needed = fl->cap; 1597 fl->lowat = roundup(sc->sge.fl_starve_threshold, 8); 1598 1599 c.iqns_to_fl0congen |= 1600 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 1601 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 1602 F_FW_IQ_CMD_FL0PADEN); 1603 if (cong >= 0) { 1604 c.iqns_to_fl0congen |= 1605 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 1606 F_FW_IQ_CMD_FL0CONGCIF | 1607 F_FW_IQ_CMD_FL0CONGEN); 1608 } 1609 c.fl0dcaen_to_fl0cidxfthresh = 1610 htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) | 1611 V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B)); 1612 c.fl0size = htobe16(fl->qsize); 1613 c.fl0addr = htobe64(fl->ba); 1614 } 1615 1616 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1617 if (rc != 0) { 1618 device_printf(sc->dev, 1619 "failed to create ingress queue: %d\n", rc); 1620 return (rc); 1621 } 1622 1623 iq->cdesc = iq->desc; 1624 iq->cidx = 0; 1625 iq->gen = 1; 1626 iq->intr_next = iq->intr_params; 1627 iq->cntxt_id = be16toh(c.iqid); 1628 iq->abs_id = be16toh(c.physiqid); 1629 iq->flags |= IQ_ALLOCATED; 1630 1631 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 1632 if (cntxt_id >= sc->sge.niq) { 1633 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 1634 cntxt_id, sc->sge.niq - 1); 1635 } 1636 sc->sge.iqmap[cntxt_id] = iq; 1637 1638 if (fl) { 1639 fl->cntxt_id = be16toh(c.fl0id); 1640 fl->pidx = fl->cidx = 0; 1641 1642 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 1643 if (cntxt_id >= sc->sge.neq) { 1644 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 1645 __func__, cntxt_id, sc->sge.neq - 1); 1646 } 1647 sc->sge.eqmap[cntxt_id] = (void *)fl; 1648 1649 FL_LOCK(fl); 1650 /* Enough to make sure the SGE doesn't think it's starved */ 1651 refill_fl(sc, fl, fl->lowat); 1652 FL_UNLOCK(fl); 1653 1654 iq->flags |= IQ_HAS_FL; 1655 } 1656 1657 /* Enable IQ interrupts */ 1658 atomic_store_rel_int(&iq->state, IQS_IDLE); 1659 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) | 1660 V_INGRESSQID(iq->cntxt_id)); 1661 1662 return (0); 1663} 1664 1665static int 1666free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) 1667{ 1668 int i, rc; 1669 struct adapter *sc = iq->adapter; 1670 device_t dev; 1671 1672 if (sc == NULL) 1673 return (0); /* nothing to do */ 1674 1675 dev = pi ? pi->dev : sc->dev; 1676 1677 if (iq->flags & IQ_ALLOCATED) { 1678 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 1679 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 1680 fl ? fl->cntxt_id : 0xffff, 0xffff); 1681 if (rc != 0) { 1682 device_printf(dev, 1683 "failed to free queue %p: %d\n", iq, rc); 1684 return (rc); 1685 } 1686 iq->flags &= ~IQ_ALLOCATED; 1687 } 1688 1689 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 1690 1691 bzero(iq, sizeof(*iq)); 1692 1693 if (fl) { 1694 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 1695 fl->desc); 1696 1697 if (fl->sdesc) { 1698 FL_LOCK(fl); 1699 free_fl_sdesc(fl); 1700 FL_UNLOCK(fl); 1701 } 1702 1703 if (mtx_initialized(&fl->fl_lock)) 1704 mtx_destroy(&fl->fl_lock); 1705 1706 for (i = 0; i < FL_BUF_SIZES; i++) { 1707 if (fl->tag[i]) 1708 bus_dma_tag_destroy(fl->tag[i]); 1709 } 1710 1711 bzero(fl, sizeof(*fl)); 1712 } 1713 1714 return (0); 1715} 1716 1717static int 1718alloc_fwq(struct adapter *sc) 1719{ 1720 int rc, intr_idx; 1721 struct sge_iq *fwq = &sc->sge.fwq; 1722 char name[16]; 1723 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 1724 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 1725 1726 snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev)); 1727 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, name); 1728 fwq->flags |= IQ_INTR; /* always */ 1729 intr_idx = sc->intr_count > 1 ? 1 : 0; 1730 rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); 1731 if (rc != 0) { 1732 device_printf(sc->dev, 1733 "failed to create firmware event queue: %d\n", rc); 1734 return (rc); 1735 } 1736 1737 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, 1738 NULL, "firmware event queue"); 1739 children = SYSCTL_CHILDREN(oid); 1740 1741 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id", 1742 CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I", 1743 "absolute id of the queue"); 1744 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id", 1745 CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I", 1746 "SGE context id of the queue"); 1747 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", 1748 CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", 1749 "consumer index"); 1750 1751 return (0); 1752} 1753 1754static int 1755free_fwq(struct adapter *sc) 1756{ 1757 return free_iq_fl(NULL, &sc->sge.fwq, NULL); 1758} 1759 1760static int 1761alloc_mgmtq(struct adapter *sc) 1762{ 1763 int rc; 1764 struct sge_wrq *mgmtq = &sc->sge.mgmtq; 1765 char name[16]; 1766 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 1767 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 1768 1769 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD, 1770 NULL, "management queue"); 1771 1772 snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev)); 1773 init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, 1774 sc->sge.fwq.cntxt_id, name); 1775 rc = alloc_wrq(sc, NULL, mgmtq, oid); 1776 if (rc != 0) { 1777 device_printf(sc->dev, 1778 "failed to create management queue: %d\n", rc); 1779 return (rc); 1780 } 1781 1782 return (0); 1783} 1784 1785static int 1786free_mgmtq(struct adapter *sc) 1787{ 1788 1789 return free_wrq(sc, &sc->sge.mgmtq); 1790} 1791 1792static int 1793alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx, 1794 struct sysctl_oid *oid) 1795{ 1796 int rc; 1797 struct sysctl_oid_list *children; 1798 char name[16]; 1799 1800 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, 1 << pi->tx_chan); 1801 if (rc != 0) 1802 return (rc); 1803 1804 FL_LOCK(&rxq->fl); 1805 refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8); 1806 FL_UNLOCK(&rxq->fl); 1807 1808#ifdef INET 1809 rc = tcp_lro_init(&rxq->lro); 1810 if (rc != 0) 1811 return (rc); 1812 rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */ 1813 1814 if (pi->ifp->if_capenable & IFCAP_LRO) 1815 rxq->iq.flags |= IQ_LRO_ENABLED; 1816#endif 1817 rxq->ifp = pi->ifp; 1818 1819 children = SYSCTL_CHILDREN(oid); 1820 1821 snprintf(name, sizeof(name), "%d", idx); 1822 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1823 NULL, "rx queue"); 1824 children = SYSCTL_CHILDREN(oid); 1825 1826 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", 1827 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I", 1828 "absolute id of the queue"); 1829 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 1830 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I", 1831 "SGE context id of the queue"); 1832 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", 1833 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I", 1834 "consumer index"); 1835#ifdef INET 1836 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 1837 &rxq->lro.lro_queued, 0, NULL); 1838 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 1839 &rxq->lro.lro_flushed, 0, NULL); 1840#endif 1841 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 1842 &rxq->rxcsum, "# of times hardware assisted with checksum"); 1843 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction", 1844 CTLFLAG_RD, &rxq->vlan_extraction, 1845 "# of times hardware extracted 802.1Q tag"); 1846 1847 children = SYSCTL_CHILDREN(oid); 1848 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD, 1849 NULL, "freelist"); 1850 children = SYSCTL_CHILDREN(oid); 1851 1852 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 1853 CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I", 1854 "SGE context id of the queue"); 1855 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 1856 &rxq->fl.cidx, 0, "consumer index"); 1857 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 1858 &rxq->fl.pidx, 0, "producer index"); 1859 1860 return (rc); 1861} 1862 1863static int 1864free_rxq(struct port_info *pi, struct sge_rxq *rxq) 1865{ 1866 int rc; 1867 1868#ifdef INET 1869 if (rxq->lro.ifp) { 1870 tcp_lro_free(&rxq->lro); 1871 rxq->lro.ifp = NULL; 1872 } 1873#endif 1874 1875 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl); 1876 if (rc == 0) 1877 bzero(rxq, sizeof(*rxq)); 1878 1879 return (rc); 1880} 1881 1882#ifdef TCP_OFFLOAD 1883static int 1884alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq, 1885 int intr_idx, int idx, struct sysctl_oid *oid) 1886{ 1887 int rc; 1888 struct sysctl_oid_list *children; 1889 char name[16]; 1890 1891 rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 1892 1 << pi->tx_chan); 1893 if (rc != 0) 1894 return (rc); 1895 1896 children = SYSCTL_CHILDREN(oid); 1897 1898 snprintf(name, sizeof(name), "%d", idx); 1899 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1900 NULL, "rx queue"); 1901 children = SYSCTL_CHILDREN(oid); 1902 1903 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", 1904 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16, 1905 "I", "absolute id of the queue"); 1906 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 1907 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16, 1908 "I", "SGE context id of the queue"); 1909 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", 1910 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I", 1911 "consumer index"); 1912 1913 children = SYSCTL_CHILDREN(oid); 1914 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD, 1915 NULL, "freelist"); 1916 children = SYSCTL_CHILDREN(oid); 1917 1918 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 1919 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16, 1920 "I", "SGE context id of the queue"); 1921 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 1922 &ofld_rxq->fl.cidx, 0, "consumer index"); 1923 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 1924 &ofld_rxq->fl.pidx, 0, "producer index"); 1925 1926 return (rc); 1927} 1928 1929static int 1930free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq) 1931{ 1932 int rc; 1933 1934 rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl); 1935 if (rc == 0) 1936 bzero(ofld_rxq, sizeof(*ofld_rxq)); 1937 1938 return (rc); 1939} 1940#endif 1941 1942static int 1943ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 1944{ 1945 int rc, cntxt_id; 1946 struct fw_eq_ctrl_cmd c; 1947 1948 bzero(&c, sizeof(c)); 1949 1950 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 1951 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 1952 V_FW_EQ_CTRL_CMD_VFN(0)); 1953 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 1954 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 1955 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */ 1956 c.physeqid_pkd = htobe32(0); 1957 c.fetchszm_to_iqid = 1958 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 1959 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 1960 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 1961 c.dcaen_to_eqsize = 1962 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1963 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1964 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1965 V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize)); 1966 c.eqaddr = htobe64(eq->ba); 1967 1968 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1969 if (rc != 0) { 1970 device_printf(sc->dev, 1971 "failed to create control queue %d: %d\n", eq->tx_chan, rc); 1972 return (rc); 1973 } 1974 eq->flags |= EQ_ALLOCATED; 1975 1976 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 1977 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 1978 if (cntxt_id >= sc->sge.neq) 1979 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 1980 cntxt_id, sc->sge.neq - 1); 1981 sc->sge.eqmap[cntxt_id] = eq; 1982 1983 return (rc); 1984} 1985 1986static int 1987eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 1988{ 1989 int rc, cntxt_id; 1990 struct fw_eq_eth_cmd c; 1991 1992 bzero(&c, sizeof(c)); 1993 1994 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 1995 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 1996 V_FW_EQ_ETH_CMD_VFN(0)); 1997 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 1998 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 1999 c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid)); 2000 c.fetchszm_to_iqid = 2001 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 2002 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 2003 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 2004 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 2005 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 2006 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 2007 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)); 2008 c.eqaddr = htobe64(eq->ba); 2009 2010 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2011 if (rc != 0) { 2012 device_printf(pi->dev, 2013 "failed to create Ethernet egress queue: %d\n", rc); 2014 return (rc); 2015 } 2016 eq->flags |= EQ_ALLOCATED; 2017 2018 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 2019 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 2020 if (cntxt_id >= sc->sge.neq) 2021 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 2022 cntxt_id, sc->sge.neq - 1); 2023 sc->sge.eqmap[cntxt_id] = eq; 2024 2025 return (rc); 2026} 2027 2028#ifdef TCP_OFFLOAD 2029static int 2030ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 2031{ 2032 int rc, cntxt_id; 2033 struct fw_eq_ofld_cmd c; 2034 2035 bzero(&c, sizeof(c)); 2036 2037 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 2038 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 2039 V_FW_EQ_OFLD_CMD_VFN(0)); 2040 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 2041 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 2042 c.fetchszm_to_iqid = 2043 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 2044 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 2045 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 2046 c.dcaen_to_eqsize = 2047 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 2048 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 2049 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 2050 V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize)); 2051 c.eqaddr = htobe64(eq->ba); 2052 2053 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2054 if (rc != 0) { 2055 device_printf(pi->dev, 2056 "failed to create egress queue for TCP offload: %d\n", rc); 2057 return (rc); 2058 } 2059 eq->flags |= EQ_ALLOCATED; 2060 2061 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 2062 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 2063 if (cntxt_id >= sc->sge.neq) 2064 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 2065 cntxt_id, sc->sge.neq - 1); 2066 sc->sge.eqmap[cntxt_id] = eq; 2067 2068 return (rc); 2069} 2070#endif 2071 2072static int 2073alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 2074{ 2075 int rc; 2076 size_t len; 2077 2078 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 2079 2080 len = eq->qsize * EQ_ESIZE; 2081 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 2082 &eq->ba, (void **)&eq->desc); 2083 if (rc) 2084 return (rc); 2085 2086 eq->cap = eq->qsize - spg_len / EQ_ESIZE; 2087 eq->spg = (void *)&eq->desc[eq->cap]; 2088 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ 2089 eq->pidx = eq->cidx = 0; 2090 2091 switch (eq->flags & EQ_TYPEMASK) { 2092 case EQ_CTRL: 2093 rc = ctrl_eq_alloc(sc, eq); 2094 break; 2095 2096 case EQ_ETH: 2097 rc = eth_eq_alloc(sc, pi, eq); 2098 break; 2099 2100#ifdef TCP_OFFLOAD 2101 case EQ_OFLD: 2102 rc = ofld_eq_alloc(sc, pi, eq); 2103 break; 2104#endif 2105 2106 default: 2107 panic("%s: invalid eq type %d.", __func__, 2108 eq->flags & EQ_TYPEMASK); 2109 } 2110 if (rc != 0) { 2111 device_printf(sc->dev, 2112 "failed to allocate egress queue(%d): %d", 2113 eq->flags & EQ_TYPEMASK, rc); 2114 } 2115 2116 eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus; 2117 2118 return (rc); 2119} 2120 2121static int 2122free_eq(struct adapter *sc, struct sge_eq *eq) 2123{ 2124 int rc; 2125 2126 if (eq->flags & EQ_ALLOCATED) { 2127 switch (eq->flags & EQ_TYPEMASK) { 2128 case EQ_CTRL: 2129 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, 2130 eq->cntxt_id); 2131 break; 2132 2133 case EQ_ETH: 2134 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, 2135 eq->cntxt_id); 2136 break; 2137 2138#ifdef TCP_OFFLOAD 2139 case EQ_OFLD: 2140 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, 2141 eq->cntxt_id); 2142 break; 2143#endif 2144 2145 default: 2146 panic("%s: invalid eq type %d.", __func__, 2147 eq->flags & EQ_TYPEMASK); 2148 } 2149 if (rc != 0) { 2150 device_printf(sc->dev, 2151 "failed to free egress queue (%d): %d\n", 2152 eq->flags & EQ_TYPEMASK, rc); 2153 return (rc); 2154 } 2155 eq->flags &= ~EQ_ALLOCATED; 2156 } 2157 2158 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 2159 2160 if (mtx_initialized(&eq->eq_lock)) 2161 mtx_destroy(&eq->eq_lock); 2162 2163 bzero(eq, sizeof(*eq)); 2164 return (0); 2165} 2166 2167static int 2168alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq, 2169 struct sysctl_oid *oid) 2170{ 2171 int rc; 2172 struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx; 2173 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2174 2175 rc = alloc_eq(sc, pi, &wrq->eq); 2176 if (rc) 2177 return (rc); 2178 2179 wrq->adapter = sc; 2180 STAILQ_INIT(&wrq->wr_list); 2181 2182 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 2183 &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); 2184 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 2185 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", 2186 "consumer index"); 2187 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", 2188 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", 2189 "producer index"); 2190 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD, 2191 &wrq->tx_wrs, "# of work requests"); 2192 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 2193 &wrq->no_desc, 0, 2194 "# of times queue ran out of hardware descriptors"); 2195 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD, 2196 &wrq->eq.unstalled, 0, "# of times queue recovered after stall"); 2197 2198 2199 return (rc); 2200} 2201 2202static int 2203free_wrq(struct adapter *sc, struct sge_wrq *wrq) 2204{ 2205 int rc; 2206 2207 rc = free_eq(sc, &wrq->eq); 2208 if (rc) 2209 return (rc); 2210 2211 bzero(wrq, sizeof(*wrq)); 2212 return (0); 2213} 2214 2215static int 2216alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx, 2217 struct sysctl_oid *oid) 2218{ 2219 int rc; 2220 struct adapter *sc = pi->adapter; 2221 struct sge_eq *eq = &txq->eq; 2222 char name[16]; 2223 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2224 2225 rc = alloc_eq(sc, pi, eq); 2226 if (rc) 2227 return (rc); 2228 2229 txq->ifp = pi->ifp; 2230 2231 txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE, 2232 M_ZERO | M_WAITOK); 2233 txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock); 2234 2235 rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR, 2236 BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS, 2237 BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag); 2238 if (rc != 0) { 2239 device_printf(sc->dev, 2240 "failed to create tx DMA tag: %d\n", rc); 2241 return (rc); 2242 } 2243 2244 /* 2245 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE 2246 * limit for any WR). txq->no_dmamap events shouldn't occur if maps is 2247 * sized for the worst case. 2248 */ 2249 rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8, 2250 M_WAITOK); 2251 if (rc != 0) { 2252 device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc); 2253 return (rc); 2254 } 2255 2256 snprintf(name, sizeof(name), "%d", idx); 2257 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 2258 NULL, "tx queue"); 2259 children = SYSCTL_CHILDREN(oid); 2260 2261 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 2262 &eq->cntxt_id, 0, "SGE context id of the queue"); 2263 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", 2264 CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", 2265 "consumer index"); 2266 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx", 2267 CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", 2268 "producer index"); 2269 2270 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 2271 &txq->txcsum, "# of times hardware assisted with checksum"); 2272 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion", 2273 CTLFLAG_RD, &txq->vlan_insertion, 2274 "# of times hardware inserted 802.1Q tag"); 2275 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 2276 &txq->tso_wrs, "# of IPv4 TSO work requests"); 2277 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 2278 &txq->imm_wrs, "# of work requests with immediate data"); 2279 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 2280 &txq->sgl_wrs, "# of work requests with direct SGL"); 2281 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 2282 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 2283 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD, 2284 &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)"); 2285 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD, 2286 &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests"); 2287 2288 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD, 2289 &txq->no_dmamap, 0, "# of times txq ran out of DMA maps"); 2290 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 2291 &txq->no_desc, 0, "# of times txq ran out of hardware descriptors"); 2292 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD, 2293 &eq->egr_update, 0, "egress update notifications from the SGE"); 2294 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD, 2295 &eq->unstalled, 0, "# of times txq recovered after stall"); 2296 2297 return (rc); 2298} 2299 2300static int 2301free_txq(struct port_info *pi, struct sge_txq *txq) 2302{ 2303 int rc; 2304 struct adapter *sc = pi->adapter; 2305 struct sge_eq *eq = &txq->eq; 2306 2307 rc = free_eq(sc, eq); 2308 if (rc) 2309 return (rc); 2310 2311 free(txq->sdesc, M_CXGBE); 2312 2313 if (txq->txmaps.maps) 2314 t4_free_tx_maps(&txq->txmaps, txq->tx_tag); 2315 2316 buf_ring_free(txq->br, M_CXGBE); 2317 2318 if (txq->tx_tag) 2319 bus_dma_tag_destroy(txq->tx_tag); 2320 2321 bzero(txq, sizeof(*txq)); 2322 return (0); 2323} 2324 2325static void 2326oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 2327{ 2328 bus_addr_t *ba = arg; 2329 2330 KASSERT(nseg == 1, 2331 ("%s meant for single segment mappings only.", __func__)); 2332 2333 *ba = error ? 0 : segs->ds_addr; 2334} 2335 2336static inline bool 2337is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl) 2338{ 2339 *ctrl = (void *)((uintptr_t)iq->cdesc + 2340 (iq->esize - sizeof(struct rsp_ctrl))); 2341 2342 return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen); 2343} 2344 2345static inline void 2346iq_next(struct sge_iq *iq) 2347{ 2348 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize); 2349 if (__predict_false(++iq->cidx == iq->qsize - 1)) { 2350 iq->cidx = 0; 2351 iq->gen ^= 1; 2352 iq->cdesc = iq->desc; 2353 } 2354} 2355 2356#define FL_HW_IDX(x) ((x) >> 3) 2357static inline void 2358ring_fl_db(struct adapter *sc, struct sge_fl *fl) 2359{ 2360 int ndesc = fl->pending / 8; 2361 2362 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx)) 2363 ndesc--; /* hold back one credit */ 2364 2365 if (ndesc <= 0) 2366 return; /* nothing to do */ 2367 2368 wmb(); 2369 2370 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO | 2371 V_QID(fl->cntxt_id) | V_PIDX(ndesc)); 2372 fl->pending -= ndesc * 8; 2373} 2374 2375/* 2376 * Fill up the freelist by upto nbufs and maybe ring its doorbell. 2377 * 2378 * Returns non-zero to indicate that it should be added to the list of starving 2379 * freelists. 2380 */ 2381static int 2382refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs) 2383{ 2384 __be64 *d = &fl->desc[fl->pidx]; 2385 struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; 2386 bus_dma_tag_t tag; 2387 bus_addr_t pa; 2388 caddr_t cl; 2389 int rc; 2390 2391 FL_LOCK_ASSERT_OWNED(fl); 2392 2393 if (nbufs > fl->needed) 2394 nbufs = fl->needed; 2395 2396 while (nbufs--) { 2397 2398 if (sd->cl != NULL) { 2399 2400 /* 2401 * This happens when a frame small enough to fit 2402 * entirely in an mbuf was received in cl last time. 2403 * We'd held on to cl and can reuse it now. Note that 2404 * we reuse a cluster of the old size if fl->tag_idx is 2405 * no longer the same as sd->tag_idx. 2406 */ 2407 2408 KASSERT(*d == sd->ba_tag, 2409 ("%s: recyling problem at pidx %d", 2410 __func__, fl->pidx)); 2411 2412 d++; 2413 goto recycled; 2414 } 2415 2416 2417 if (fl->tag_idx != sd->tag_idx) { 2418 bus_dmamap_t map; 2419 bus_dma_tag_t newtag = fl->tag[fl->tag_idx]; 2420 bus_dma_tag_t oldtag = fl->tag[sd->tag_idx]; 2421 2422 /* 2423 * An MTU change can get us here. Discard the old map 2424 * which was created with the old tag, but only if 2425 * we're able to get a new one. 2426 */ 2427 rc = bus_dmamap_create(newtag, 0, &map); 2428 if (rc == 0) { 2429 bus_dmamap_destroy(oldtag, sd->map); 2430 sd->map = map; 2431 sd->tag_idx = fl->tag_idx; 2432 } 2433 } 2434 2435 tag = fl->tag[sd->tag_idx]; 2436 2437 cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx)); 2438 if (cl == NULL) 2439 break; 2440 2441 rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx), 2442 oneseg_dma_callback, &pa, 0); 2443 if (rc != 0 || pa == 0) { 2444 fl->dmamap_failed++; 2445 uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl); 2446 break; 2447 } 2448 2449 sd->cl = cl; 2450 *d++ = htobe64(pa | sd->tag_idx); 2451 2452#ifdef INVARIANTS 2453 sd->ba_tag = htobe64(pa | sd->tag_idx); 2454#endif 2455 2456recycled: 2457 /* sd->m is never recycled, should always be NULL */ 2458 KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__)); 2459 2460 sd->m = m_gethdr(M_NOWAIT, MT_NOINIT); 2461 if (sd->m == NULL) 2462 break; 2463 2464 fl->pending++; 2465 fl->needed--; 2466 sd++; 2467 if (++fl->pidx == fl->cap) { 2468 fl->pidx = 0; 2469 sd = fl->sdesc; 2470 d = fl->desc; 2471 } 2472 } 2473 2474 if (fl->pending >= 8) 2475 ring_fl_db(sc, fl); 2476 2477 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 2478} 2479 2480/* 2481 * Attempt to refill all starving freelists. 2482 */ 2483static void 2484refill_sfl(void *arg) 2485{ 2486 struct adapter *sc = arg; 2487 struct sge_fl *fl, *fl_temp; 2488 2489 mtx_lock(&sc->sfl_lock); 2490 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 2491 FL_LOCK(fl); 2492 refill_fl(sc, fl, 64); 2493 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 2494 TAILQ_REMOVE(&sc->sfl, fl, link); 2495 fl->flags &= ~FL_STARVING; 2496 } 2497 FL_UNLOCK(fl); 2498 } 2499 2500 if (!TAILQ_EMPTY(&sc->sfl)) 2501 callout_schedule(&sc->sfl_callout, hz / 5); 2502 mtx_unlock(&sc->sfl_lock); 2503} 2504 2505static int 2506alloc_fl_sdesc(struct sge_fl *fl) 2507{ 2508 struct fl_sdesc *sd; 2509 bus_dma_tag_t tag; 2510 int i, rc; 2511 2512 FL_LOCK_ASSERT_OWNED(fl); 2513 2514 fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE, 2515 M_ZERO | M_WAITOK); 2516 2517 tag = fl->tag[fl->tag_idx]; 2518 sd = fl->sdesc; 2519 for (i = 0; i < fl->cap; i++, sd++) { 2520 2521 sd->tag_idx = fl->tag_idx; 2522 rc = bus_dmamap_create(tag, 0, &sd->map); 2523 if (rc != 0) 2524 goto failed; 2525 } 2526 2527 return (0); 2528failed: 2529 while (--i >= 0) { 2530 sd--; 2531 bus_dmamap_destroy(tag, sd->map); 2532 if (sd->m) { 2533 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0); 2534 m_free(sd->m); 2535 sd->m = NULL; 2536 } 2537 } 2538 KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__)); 2539 2540 free(fl->sdesc, M_CXGBE); 2541 fl->sdesc = NULL; 2542 2543 return (rc); 2544} 2545 2546static void 2547free_fl_sdesc(struct sge_fl *fl) 2548{ 2549 struct fl_sdesc *sd; 2550 int i; 2551 2552 FL_LOCK_ASSERT_OWNED(fl); 2553 2554 sd = fl->sdesc; 2555 for (i = 0; i < fl->cap; i++, sd++) { 2556 2557 if (sd->m) { 2558 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0); 2559 m_free(sd->m); 2560 sd->m = NULL; 2561 } 2562 2563 if (sd->cl) { 2564 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 2565 uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl); 2566 sd->cl = NULL; 2567 } 2568 2569 bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map); 2570 } 2571 2572 free(fl->sdesc, M_CXGBE); 2573 fl->sdesc = NULL; 2574} 2575 2576int 2577t4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count, 2578 int flags) 2579{ 2580 struct tx_map *txm; 2581 int i, rc; 2582 2583 txmaps->map_total = txmaps->map_avail = count; 2584 txmaps->map_cidx = txmaps->map_pidx = 0; 2585 2586 txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE, 2587 M_ZERO | flags); 2588 2589 txm = txmaps->maps; 2590 for (i = 0; i < count; i++, txm++) { 2591 rc = bus_dmamap_create(tx_tag, 0, &txm->map); 2592 if (rc != 0) 2593 goto failed; 2594 } 2595 2596 return (0); 2597failed: 2598 while (--i >= 0) { 2599 txm--; 2600 bus_dmamap_destroy(tx_tag, txm->map); 2601 } 2602 KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__)); 2603 2604 free(txmaps->maps, M_CXGBE); 2605 txmaps->maps = NULL; 2606 2607 return (rc); 2608} 2609 2610void 2611t4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag) 2612{ 2613 struct tx_map *txm; 2614 int i; 2615 2616 txm = txmaps->maps; 2617 for (i = 0; i < txmaps->map_total; i++, txm++) { 2618 2619 if (txm->m) { 2620 bus_dmamap_unload(tx_tag, txm->map); 2621 m_freem(txm->m); 2622 txm->m = NULL; 2623 } 2624 2625 bus_dmamap_destroy(tx_tag, txm->map); 2626 } 2627 2628 free(txmaps->maps, M_CXGBE); 2629 txmaps->maps = NULL; 2630} 2631 2632/* 2633 * We'll do immediate data tx for non-TSO, but only when not coalescing. We're 2634 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes 2635 * of immediate data. 2636 */ 2637#define IMM_LEN ( \ 2638 2 * EQ_ESIZE \ 2639 - sizeof(struct fw_eth_tx_pkt_wr) \ 2640 - sizeof(struct cpl_tx_pkt_core)) 2641 2642/* 2643 * Returns non-zero on failure, no need to cleanup anything in that case. 2644 * 2645 * Note 1: We always try to defrag the mbuf if required and return EFBIG only 2646 * if the resulting chain still won't fit in a tx descriptor. 2647 * 2648 * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf 2649 * does not have the TCP header in it. 2650 */ 2651static int 2652get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl, 2653 int sgl_only) 2654{ 2655 struct mbuf *m = *fp; 2656 struct tx_maps *txmaps; 2657 struct tx_map *txm; 2658 int rc, defragged = 0, n; 2659 2660 TXQ_LOCK_ASSERT_OWNED(txq); 2661 2662 if (m->m_pkthdr.tso_segsz) 2663 sgl_only = 1; /* Do not allow immediate data with LSO */ 2664 2665start: sgl->nsegs = 0; 2666 2667 if (m->m_pkthdr.len <= IMM_LEN && !sgl_only) 2668 return (0); /* nsegs = 0 tells caller to use imm. tx */ 2669 2670 txmaps = &txq->txmaps; 2671 if (txmaps->map_avail == 0) { 2672 txq->no_dmamap++; 2673 return (ENOMEM); 2674 } 2675 txm = &txmaps->maps[txmaps->map_pidx]; 2676 2677 if (m->m_pkthdr.tso_segsz && m->m_len < 50) { 2678 *fp = m_pullup(m, 50); 2679 m = *fp; 2680 if (m == NULL) 2681 return (ENOBUFS); 2682 } 2683 2684 rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg, 2685 &sgl->nsegs, BUS_DMA_NOWAIT); 2686 if (rc == EFBIG && defragged == 0) { 2687 m = m_defrag(m, M_DONTWAIT); 2688 if (m == NULL) 2689 return (EFBIG); 2690 2691 defragged = 1; 2692 *fp = m; 2693 goto start; 2694 } 2695 if (rc != 0) 2696 return (rc); 2697 2698 txm->m = m; 2699 txmaps->map_avail--; 2700 if (++txmaps->map_pidx == txmaps->map_total) 2701 txmaps->map_pidx = 0; 2702 2703 KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS, 2704 ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs)); 2705 2706 /* 2707 * Store the # of flits required to hold this frame's SGL in nflits. An 2708 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by 2709 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used 2710 * then len1 must be set to 0. 2711 */ 2712 n = sgl->nsegs - 1; 2713 sgl->nflits = (3 * n) / 2 + (n & 1) + 2; 2714 2715 return (0); 2716} 2717 2718 2719/* 2720 * Releases all the txq resources used up in the specified sgl. 2721 */ 2722static int 2723free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl) 2724{ 2725 struct tx_maps *txmaps; 2726 struct tx_map *txm; 2727 2728 TXQ_LOCK_ASSERT_OWNED(txq); 2729 2730 if (sgl->nsegs == 0) 2731 return (0); /* didn't use any map */ 2732 2733 txmaps = &txq->txmaps; 2734 2735 /* 1 pkt uses exactly 1 map, back it out */ 2736 2737 txmaps->map_avail++; 2738 if (txmaps->map_pidx > 0) 2739 txmaps->map_pidx--; 2740 else 2741 txmaps->map_pidx = txmaps->map_total - 1; 2742 2743 txm = &txmaps->maps[txmaps->map_pidx]; 2744 bus_dmamap_unload(txq->tx_tag, txm->map); 2745 txm->m = NULL; 2746 2747 return (0); 2748} 2749 2750static int 2751write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m, 2752 struct sgl *sgl) 2753{ 2754 struct sge_eq *eq = &txq->eq; 2755 struct fw_eth_tx_pkt_wr *wr; 2756 struct cpl_tx_pkt_core *cpl; 2757 uint32_t ctrl; /* used in many unrelated places */ 2758 uint64_t ctrl1; 2759 int nflits, ndesc, pktlen; 2760 struct tx_sdesc *txsd; 2761 caddr_t dst; 2762 2763 TXQ_LOCK_ASSERT_OWNED(txq); 2764 2765 pktlen = m->m_pkthdr.len; 2766 2767 /* 2768 * Do we have enough flits to send this frame out? 2769 */ 2770 ctrl = sizeof(struct cpl_tx_pkt_core); 2771 if (m->m_pkthdr.tso_segsz) { 2772 nflits = TXPKT_LSO_WR_HDR; 2773 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 2774 } else 2775 nflits = TXPKT_WR_HDR; 2776 if (sgl->nsegs > 0) 2777 nflits += sgl->nflits; 2778 else { 2779 nflits += howmany(pktlen, 8); 2780 ctrl += pktlen; 2781 } 2782 ndesc = howmany(nflits, 8); 2783 if (ndesc > eq->avail) 2784 return (ENOMEM); 2785 2786 /* Firmware work request header */ 2787 wr = (void *)&eq->desc[eq->pidx]; 2788 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 2789 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 2790 ctrl = V_FW_WR_LEN16(howmany(nflits, 2)); 2791 if (eq->avail == ndesc) { 2792 if (!(eq->flags & EQ_CRFLUSHED)) { 2793 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2794 eq->flags |= EQ_CRFLUSHED; 2795 } 2796 eq->flags |= EQ_STALLED; 2797 } 2798 2799 wr->equiq_to_len16 = htobe32(ctrl); 2800 wr->r3 = 0; 2801 2802 if (m->m_pkthdr.tso_segsz) { 2803 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 2804 struct ether_header *eh; 2805 struct ip *ip; 2806 struct tcphdr *tcp; 2807 2808 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 2809 F_LSO_LAST_SLICE; 2810 2811 eh = mtod(m, struct ether_header *); 2812 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2813 ctrl |= V_LSO_ETHHDR_LEN(1); 2814 ip = (void *)((struct ether_vlan_header *)eh + 1); 2815 } else 2816 ip = (void *)(eh + 1); 2817 2818 tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4); 2819 ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) | 2820 V_LSO_TCPHDR_LEN(tcp->th_off); 2821 2822 lso->lso_ctrl = htobe32(ctrl); 2823 lso->ipid_ofst = htobe16(0); 2824 lso->mss = htobe16(m->m_pkthdr.tso_segsz); 2825 lso->seqno_offset = htobe32(0); 2826 lso->len = htobe32(pktlen); 2827 2828 cpl = (void *)(lso + 1); 2829 2830 txq->tso_wrs++; 2831 } else 2832 cpl = (void *)(wr + 1); 2833 2834 /* Checksum offload */ 2835 ctrl1 = 0; 2836 if (!(m->m_pkthdr.csum_flags & CSUM_IP)) 2837 ctrl1 |= F_TXPKT_IPCSUM_DIS; 2838 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | 2839 CSUM_TCP_IPV6))) 2840 ctrl1 |= F_TXPKT_L4CSUM_DIS; 2841 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 2842 CSUM_UDP_IPV6 | CSUM_TCP_IPV6)) 2843 txq->txcsum++; /* some hardware assistance provided */ 2844 2845 /* VLAN tag insertion */ 2846 if (m->m_flags & M_VLANTAG) { 2847 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 2848 txq->vlan_insertion++; 2849 } 2850 2851 /* CPL header */ 2852 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 2853 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 2854 cpl->pack = 0; 2855 cpl->len = htobe16(pktlen); 2856 cpl->ctrl1 = htobe64(ctrl1); 2857 2858 /* Software descriptor */ 2859 txsd = &txq->sdesc[eq->pidx]; 2860 txsd->desc_used = ndesc; 2861 2862 eq->pending += ndesc; 2863 eq->avail -= ndesc; 2864 eq->pidx += ndesc; 2865 if (eq->pidx >= eq->cap) 2866 eq->pidx -= eq->cap; 2867 2868 /* SGL */ 2869 dst = (void *)(cpl + 1); 2870 if (sgl->nsegs > 0) { 2871 txsd->credits = 1; 2872 txq->sgl_wrs++; 2873 write_sgl_to_txd(eq, sgl, &dst); 2874 } else { 2875 txsd->credits = 0; 2876 txq->imm_wrs++; 2877 for (; m; m = m->m_next) { 2878 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 2879#ifdef INVARIANTS 2880 pktlen -= m->m_len; 2881#endif 2882 } 2883#ifdef INVARIANTS 2884 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 2885#endif 2886 2887 } 2888 2889 txq->txpkt_wrs++; 2890 return (0); 2891} 2892 2893/* 2894 * Returns 0 to indicate that m has been accepted into a coalesced tx work 2895 * request. It has either been folded into txpkts or txpkts was flushed and m 2896 * has started a new coalesced work request (as the first frame in a fresh 2897 * txpkts). 2898 * 2899 * Returns non-zero to indicate a failure - caller is responsible for 2900 * transmitting m, if there was anything in txpkts it has been flushed. 2901 */ 2902static int 2903add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts, 2904 struct mbuf *m, struct sgl *sgl) 2905{ 2906 struct sge_eq *eq = &txq->eq; 2907 int can_coalesce; 2908 struct tx_sdesc *txsd; 2909 int flits; 2910 2911 TXQ_LOCK_ASSERT_OWNED(txq); 2912 2913 KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__)); 2914 2915 if (txpkts->npkt > 0) { 2916 flits = TXPKTS_PKT_HDR + sgl->nflits; 2917 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 2918 txpkts->nflits + flits <= TX_WR_FLITS && 2919 txpkts->nflits + flits <= eq->avail * 8 && 2920 txpkts->plen + m->m_pkthdr.len < 65536; 2921 2922 if (can_coalesce) { 2923 txpkts->npkt++; 2924 txpkts->nflits += flits; 2925 txpkts->plen += m->m_pkthdr.len; 2926 2927 txsd = &txq->sdesc[eq->pidx]; 2928 txsd->credits++; 2929 2930 return (0); 2931 } 2932 2933 /* 2934 * Couldn't coalesce m into txpkts. The first order of business 2935 * is to send txpkts on its way. Then we'll revisit m. 2936 */ 2937 write_txpkts_wr(txq, txpkts); 2938 } 2939 2940 /* 2941 * Check if we can start a new coalesced tx work request with m as 2942 * the first packet in it. 2943 */ 2944 2945 KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__)); 2946 2947 flits = TXPKTS_WR_HDR + sgl->nflits; 2948 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 2949 flits <= eq->avail * 8 && flits <= TX_WR_FLITS; 2950 2951 if (can_coalesce == 0) 2952 return (EINVAL); 2953 2954 /* 2955 * Start a fresh coalesced tx WR with m as the first frame in it. 2956 */ 2957 txpkts->npkt = 1; 2958 txpkts->nflits = flits; 2959 txpkts->flitp = &eq->desc[eq->pidx].flit[2]; 2960 txpkts->plen = m->m_pkthdr.len; 2961 2962 txsd = &txq->sdesc[eq->pidx]; 2963 txsd->credits = 1; 2964 2965 return (0); 2966} 2967 2968/* 2969 * Note that write_txpkts_wr can never run out of hardware descriptors (but 2970 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for 2971 * coalescing only if sufficient hardware descriptors are available. 2972 */ 2973static void 2974write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) 2975{ 2976 struct sge_eq *eq = &txq->eq; 2977 struct fw_eth_tx_pkts_wr *wr; 2978 struct tx_sdesc *txsd; 2979 uint32_t ctrl; 2980 int ndesc; 2981 2982 TXQ_LOCK_ASSERT_OWNED(txq); 2983 2984 ndesc = howmany(txpkts->nflits, 8); 2985 2986 wr = (void *)&eq->desc[eq->pidx]; 2987 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 2988 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2)); 2989 if (eq->avail == ndesc) { 2990 if (!(eq->flags & EQ_CRFLUSHED)) { 2991 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2992 eq->flags |= EQ_CRFLUSHED; 2993 } 2994 eq->flags |= EQ_STALLED; 2995 } 2996 wr->equiq_to_len16 = htobe32(ctrl); 2997 wr->plen = htobe16(txpkts->plen); 2998 wr->npkt = txpkts->npkt; 2999 wr->r3 = wr->type = 0; 3000 3001 /* Everything else already written */ 3002 3003 txsd = &txq->sdesc[eq->pidx]; 3004 txsd->desc_used = ndesc; 3005 3006 KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__)); 3007 3008 eq->pending += ndesc; 3009 eq->avail -= ndesc; 3010 eq->pidx += ndesc; 3011 if (eq->pidx >= eq->cap) 3012 eq->pidx -= eq->cap; 3013 3014 txq->txpkts_pkts += txpkts->npkt; 3015 txq->txpkts_wrs++; 3016 txpkts->npkt = 0; /* emptied */ 3017} 3018 3019static inline void 3020write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, 3021 struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl) 3022{ 3023 struct ulp_txpkt *ulpmc; 3024 struct ulptx_idata *ulpsc; 3025 struct cpl_tx_pkt_core *cpl; 3026 struct sge_eq *eq = &txq->eq; 3027 uintptr_t flitp, start, end; 3028 uint64_t ctrl; 3029 caddr_t dst; 3030 3031 KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__)); 3032 3033 start = (uintptr_t)eq->desc; 3034 end = (uintptr_t)eq->spg; 3035 3036 /* Checksum offload */ 3037 ctrl = 0; 3038 if (!(m->m_pkthdr.csum_flags & CSUM_IP)) 3039 ctrl |= F_TXPKT_IPCSUM_DIS; 3040 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) 3041 ctrl |= F_TXPKT_L4CSUM_DIS; 3042 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP)) 3043 txq->txcsum++; /* some hardware assistance provided */ 3044 3045 /* VLAN tag insertion */ 3046 if (m->m_flags & M_VLANTAG) { 3047 ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 3048 txq->vlan_insertion++; 3049 } 3050 3051 /* 3052 * The previous packet's SGL must have ended at a 16 byte boundary (this 3053 * is required by the firmware/hardware). It follows that flitp cannot 3054 * wrap around between the ULPTX master command and ULPTX subcommand (8 3055 * bytes each), and that it can not wrap around in the middle of the 3056 * cpl_tx_pkt_core either. 3057 */ 3058 flitp = (uintptr_t)txpkts->flitp; 3059 KASSERT((flitp & 0xf) == 0, 3060 ("%s: last SGL did not end at 16 byte boundary: %p", 3061 __func__, txpkts->flitp)); 3062 3063 /* ULP master command */ 3064 ulpmc = (void *)flitp; 3065 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) | 3066 V_ULP_TXPKT_FID(eq->iqid)); 3067 ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) + 3068 sizeof(*cpl) + 8 * sgl->nflits, 16)); 3069 3070 /* ULP subcommand */ 3071 ulpsc = (void *)(ulpmc + 1); 3072 ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) | 3073 F_ULP_TX_SC_MORE); 3074 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 3075 3076 flitp += sizeof(*ulpmc) + sizeof(*ulpsc); 3077 if (flitp == end) 3078 flitp = start; 3079 3080 /* CPL_TX_PKT */ 3081 cpl = (void *)flitp; 3082 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3083 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 3084 cpl->pack = 0; 3085 cpl->len = htobe16(m->m_pkthdr.len); 3086 cpl->ctrl1 = htobe64(ctrl); 3087 3088 flitp += sizeof(*cpl); 3089 if (flitp == end) 3090 flitp = start; 3091 3092 /* SGL for this frame */ 3093 dst = (caddr_t)flitp; 3094 txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst); 3095 txpkts->flitp = (void *)dst; 3096 3097 KASSERT(((uintptr_t)dst & 0xf) == 0, 3098 ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst)); 3099} 3100 3101/* 3102 * If the SGL ends on an address that is not 16 byte aligned, this function will 3103 * add a 0 filled flit at the end. It returns 1 in that case. 3104 */ 3105static int 3106write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to) 3107{ 3108 __be64 *flitp, *end; 3109 struct ulptx_sgl *usgl; 3110 bus_dma_segment_t *seg; 3111 int i, padded; 3112 3113 KASSERT(sgl->nsegs > 0 && sgl->nflits > 0, 3114 ("%s: bad SGL - nsegs=%d, nflits=%d", 3115 __func__, sgl->nsegs, sgl->nflits)); 3116 3117 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 3118 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 3119 3120 flitp = (__be64 *)(*to); 3121 end = flitp + sgl->nflits; 3122 seg = &sgl->seg[0]; 3123 usgl = (void *)flitp; 3124 3125 /* 3126 * We start at a 16 byte boundary somewhere inside the tx descriptor 3127 * ring, so we're at least 16 bytes away from the status page. There is 3128 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 3129 */ 3130 3131 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 3132 V_ULPTX_NSGE(sgl->nsegs)); 3133 usgl->len0 = htobe32(seg->ds_len); 3134 usgl->addr0 = htobe64(seg->ds_addr); 3135 seg++; 3136 3137 if ((uintptr_t)end <= (uintptr_t)eq->spg) { 3138 3139 /* Won't wrap around at all */ 3140 3141 for (i = 0; i < sgl->nsegs - 1; i++, seg++) { 3142 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len); 3143 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr); 3144 } 3145 if (i & 1) 3146 usgl->sge[i / 2].len[1] = htobe32(0); 3147 } else { 3148 3149 /* Will wrap somewhere in the rest of the SGL */ 3150 3151 /* 2 flits already written, write the rest flit by flit */ 3152 flitp = (void *)(usgl + 1); 3153 for (i = 0; i < sgl->nflits - 2; i++) { 3154 if ((uintptr_t)flitp == (uintptr_t)eq->spg) 3155 flitp = (void *)eq->desc; 3156 *flitp++ = get_flit(seg, sgl->nsegs - 1, i); 3157 } 3158 end = flitp; 3159 } 3160 3161 if ((uintptr_t)end & 0xf) { 3162 *(uint64_t *)end = 0; 3163 end++; 3164 padded = 1; 3165 } else 3166 padded = 0; 3167 3168 if ((uintptr_t)end == (uintptr_t)eq->spg) 3169 *to = (void *)eq->desc; 3170 else 3171 *to = (void *)end; 3172 3173 return (padded); 3174} 3175 3176static inline void 3177copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 3178{ 3179 if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) { 3180 bcopy(from, *to, len); 3181 (*to) += len; 3182 } else { 3183 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to); 3184 3185 bcopy(from, *to, portion); 3186 from += portion; 3187 portion = len - portion; /* remaining */ 3188 bcopy(from, (void *)eq->desc, portion); 3189 (*to) = (caddr_t)eq->desc + portion; 3190 } 3191} 3192 3193static inline void 3194ring_eq_db(struct adapter *sc, struct sge_eq *eq) 3195{ 3196 wmb(); 3197 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), 3198 V_QID(eq->cntxt_id) | V_PIDX(eq->pending)); 3199 eq->pending = 0; 3200} 3201 3202static inline int 3203reclaimable(struct sge_eq *eq) 3204{ 3205 unsigned int cidx; 3206 3207 cidx = eq->spg->cidx; /* stable snapshot */ 3208 cidx = be16toh(cidx); 3209 3210 if (cidx >= eq->cidx) 3211 return (cidx - eq->cidx); 3212 else 3213 return (cidx + eq->cap - eq->cidx); 3214} 3215 3216/* 3217 * There are "can_reclaim" tx descriptors ready to be reclaimed. Reclaim as 3218 * many as possible but stop when there are around "n" mbufs to free. 3219 * 3220 * The actual number reclaimed is provided as the return value. 3221 */ 3222static int 3223reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n) 3224{ 3225 struct tx_sdesc *txsd; 3226 struct tx_maps *txmaps; 3227 struct tx_map *txm; 3228 unsigned int reclaimed, maps; 3229 struct sge_eq *eq = &txq->eq; 3230 3231 TXQ_LOCK_ASSERT_OWNED(txq); 3232 3233 if (can_reclaim == 0) 3234 can_reclaim = reclaimable(eq); 3235 3236 maps = reclaimed = 0; 3237 while (can_reclaim && maps < n) { 3238 int ndesc; 3239 3240 txsd = &txq->sdesc[eq->cidx]; 3241 ndesc = txsd->desc_used; 3242 3243 /* Firmware doesn't return "partial" credits. */ 3244 KASSERT(can_reclaim >= ndesc, 3245 ("%s: unexpected number of credits: %d, %d", 3246 __func__, can_reclaim, ndesc)); 3247 3248 maps += txsd->credits; 3249 3250 reclaimed += ndesc; 3251 can_reclaim -= ndesc; 3252 3253 eq->cidx += ndesc; 3254 if (__predict_false(eq->cidx >= eq->cap)) 3255 eq->cidx -= eq->cap; 3256 } 3257 3258 txmaps = &txq->txmaps; 3259 txm = &txmaps->maps[txmaps->map_cidx]; 3260 if (maps) 3261 prefetch(txm->m); 3262 3263 eq->avail += reclaimed; 3264 KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */ 3265 ("%s: too many descriptors available", __func__)); 3266 3267 txmaps->map_avail += maps; 3268 KASSERT(txmaps->map_avail <= txmaps->map_total, 3269 ("%s: too many maps available", __func__)); 3270 3271 while (maps--) { 3272 struct tx_map *next; 3273 3274 next = txm + 1; 3275 if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total)) 3276 next = txmaps->maps; 3277 prefetch(next->m); 3278 3279 bus_dmamap_unload(txq->tx_tag, txm->map); 3280 m_freem(txm->m); 3281 txm->m = NULL; 3282 3283 txm = next; 3284 if (__predict_false(++txmaps->map_cidx == txmaps->map_total)) 3285 txmaps->map_cidx = 0; 3286 } 3287 3288 return (reclaimed); 3289} 3290 3291static void 3292write_eqflush_wr(struct sge_eq *eq) 3293{ 3294 struct fw_eq_flush_wr *wr; 3295 3296 EQ_LOCK_ASSERT_OWNED(eq); 3297 KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__)); 3298 KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__)); 3299 3300 wr = (void *)&eq->desc[eq->pidx]; 3301 bzero(wr, sizeof(*wr)); 3302 wr->opcode = FW_EQ_FLUSH_WR; 3303 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) | 3304 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 3305 3306 eq->flags |= (EQ_CRFLUSHED | EQ_STALLED); 3307 eq->pending++; 3308 eq->avail--; 3309 if (++eq->pidx == eq->cap) 3310 eq->pidx = 0; 3311} 3312 3313static __be64 3314get_flit(bus_dma_segment_t *sgl, int nsegs, int idx) 3315{ 3316 int i = (idx / 3) * 2; 3317 3318 switch (idx % 3) { 3319 case 0: { 3320 __be64 rc; 3321 3322 rc = htobe32(sgl[i].ds_len); 3323 if (i + 1 < nsegs) 3324 rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32; 3325 3326 return (rc); 3327 } 3328 case 1: 3329 return htobe64(sgl[i].ds_addr); 3330 case 2: 3331 return htobe64(sgl[i + 1].ds_addr); 3332 } 3333 3334 return (0); 3335} 3336 3337static void 3338set_fl_tag_idx(struct sge_fl *fl, int bufsize) 3339{ 3340 int i; 3341 3342 for (i = 0; i < FL_BUF_SIZES - 1; i++) { 3343 if (FL_BUF_SIZE(i) >= bufsize) 3344 break; 3345 } 3346 3347 fl->tag_idx = i; 3348} 3349 3350static void 3351add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 3352{ 3353 mtx_lock(&sc->sfl_lock); 3354 FL_LOCK(fl); 3355 if ((fl->flags & FL_DOOMED) == 0) { 3356 fl->flags |= FL_STARVING; 3357 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 3358 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 3359 } 3360 FL_UNLOCK(fl); 3361 mtx_unlock(&sc->sfl_lock); 3362} 3363 3364static int 3365handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 3366 struct mbuf *m) 3367{ 3368 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 3369 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 3370 struct adapter *sc = iq->adapter; 3371 struct sge *s = &sc->sge; 3372 struct sge_eq *eq; 3373 3374 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 3375 rss->opcode)); 3376 3377 eq = s->eqmap[qid - s->eq_start]; 3378 EQ_LOCK(eq); 3379 KASSERT(eq->flags & EQ_CRFLUSHED, 3380 ("%s: unsolicited egress update", __func__)); 3381 eq->flags &= ~EQ_CRFLUSHED; 3382 eq->egr_update++; 3383 3384 if (__predict_false(eq->flags & EQ_DOOMED)) 3385 wakeup_one(eq); 3386 else if (eq->flags & EQ_STALLED && can_resume_tx(eq)) 3387 taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task); 3388 EQ_UNLOCK(eq); 3389 3390 return (0); 3391} 3392 3393static int 3394handle_fw_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 3395{ 3396 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 3397 3398 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 3399 rss->opcode)); 3400 3401 if (cpl->type == FW6_TYPE_CMD_RPL) 3402 t4_handle_fw_rpl(iq->adapter, cpl->data); 3403 3404 return (0); 3405} 3406 3407static int 3408sysctl_uint16(SYSCTL_HANDLER_ARGS) 3409{ 3410 uint16_t *id = arg1; 3411 int i = *id; 3412 3413 return sysctl_handle_int(oidp, &i, 0, req); 3414} 3415