t4_sge.c revision 306664
1/*- 2 * Copyright (c) 2011 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/t4_sge.c 306664 2016-10-03 23:49:05Z jhb $"); 30 31#include "opt_inet.h" 32#include "opt_inet6.h" 33 34#include <sys/types.h> 35#include <sys/eventhandler.h> 36#include <sys/mbuf.h> 37#include <sys/socket.h> 38#include <sys/kernel.h> 39#include <sys/malloc.h> 40#include <sys/queue.h> 41#include <sys/sbuf.h> 42#include <sys/taskqueue.h> 43#include <sys/time.h> 44#include <sys/sglist.h> 45#include <sys/sysctl.h> 46#include <sys/smp.h> 47#include <sys/counter.h> 48#include <net/bpf.h> 49#include <net/ethernet.h> 50#include <net/if.h> 51#include <net/if_vlan_var.h> 52#include <netinet/in.h> 53#include <netinet/ip.h> 54#include <netinet/ip6.h> 55#include <netinet/tcp.h> 56#include <machine/in_cksum.h> 57#include <machine/md_var.h> 58#include <vm/vm.h> 59#include <vm/pmap.h> 60#ifdef DEV_NETMAP 61#include <machine/bus.h> 62#include <sys/selinfo.h> 63#include <net/if_var.h> 64#include <net/netmap.h> 65#include <dev/netmap/netmap_kern.h> 66#endif 67 68#include "common/common.h" 69#include "common/t4_regs.h" 70#include "common/t4_regs_values.h" 71#include "common/t4_msg.h" 72#include "t4_l2t.h" 73#include "t4_mp_ring.h" 74 75#ifdef T4_PKT_TIMESTAMP 76#define RX_COPY_THRESHOLD (MINCLSIZE - 8) 77#else 78#define RX_COPY_THRESHOLD MINCLSIZE 79#endif 80 81/* 82 * Ethernet frames are DMA'd at this byte offset into the freelist buffer. 83 * 0-7 are valid values. 84 */ 85int fl_pktshift = 2; 86TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift); 87 88/* 89 * Pad ethernet payload up to this boundary. 90 * -1: driver should figure out a good value. 91 * 0: disable padding. 92 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. 93 */ 94int fl_pad = -1; 95TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad); 96 97/* 98 * Status page length. 99 * -1: driver should figure out a good value. 100 * 64 or 128 are the only other valid values. 101 */ 102int spg_len = -1; 103TUNABLE_INT("hw.cxgbe.spg_len", &spg_len); 104 105/* 106 * Congestion drops. 107 * -1: no congestion feedback (not recommended). 108 * 0: backpressure the channel instead of dropping packets right away. 109 * 1: no backpressure, drop packets for the congested queue immediately. 110 */ 111static int cong_drop = 0; 112TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop); 113 114/* 115 * Deliver multiple frames in the same free list buffer if they fit. 116 * -1: let the driver decide whether to enable buffer packing or not. 117 * 0: disable buffer packing. 118 * 1: enable buffer packing. 119 */ 120static int buffer_packing = -1; 121TUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing); 122 123/* 124 * Start next frame in a packed buffer at this boundary. 125 * -1: driver should figure out a good value. 126 * T4: driver will ignore this and use the same value as fl_pad above. 127 * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. 128 */ 129static int fl_pack = -1; 130TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack); 131 132/* 133 * Allow the driver to create mbuf(s) in a cluster allocated for rx. 134 * 0: never; always allocate mbufs from the zone_mbuf UMA zone. 135 * 1: ok to create mbuf(s) within a cluster if there is room. 136 */ 137static int allow_mbufs_in_cluster = 1; 138TUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster); 139 140/* 141 * Largest rx cluster size that the driver is allowed to allocate. 142 */ 143static int largest_rx_cluster = MJUM16BYTES; 144TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster); 145 146/* 147 * Size of cluster allocation that's most likely to succeed. The driver will 148 * fall back to this size if it fails to allocate clusters larger than this. 149 */ 150static int safest_rx_cluster = PAGE_SIZE; 151TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster); 152 153struct txpkts { 154 u_int wr_type; /* type 0 or type 1 */ 155 u_int npkt; /* # of packets in this work request */ 156 u_int plen; /* total payload (sum of all packets) */ 157 u_int len16; /* # of 16B pieces used by this work request */ 158}; 159 160/* A packet's SGL. This + m_pkthdr has all info needed for tx */ 161struct sgl { 162 struct sglist sg; 163 struct sglist_seg seg[TX_SGL_SEGS]; 164}; 165 166static int service_iq(struct sge_iq *, int); 167static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); 168static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); 169static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); 170static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); 171static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, 172 uint16_t, char *); 173static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 174 bus_addr_t *, void **); 175static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 176 void *); 177static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, 178 int, int); 179static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *); 180static void add_fl_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 181 struct sge_fl *); 182static int alloc_fwq(struct adapter *); 183static int free_fwq(struct adapter *); 184static int alloc_mgmtq(struct adapter *); 185static int free_mgmtq(struct adapter *); 186static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, 187 struct sysctl_oid *); 188static int free_rxq(struct vi_info *, struct sge_rxq *); 189#ifdef TCP_OFFLOAD 190static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, 191 struct sysctl_oid *); 192static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); 193#endif 194#ifdef DEV_NETMAP 195static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int, 196 struct sysctl_oid *); 197static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *); 198static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int, 199 struct sysctl_oid *); 200static int free_nm_txq(struct vi_info *, struct sge_nm_txq *); 201#endif 202static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 203static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 204#ifdef TCP_OFFLOAD 205static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 206#endif 207static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *); 208static int free_eq(struct adapter *, struct sge_eq *); 209static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, 210 struct sysctl_oid *); 211static int free_wrq(struct adapter *, struct sge_wrq *); 212static int alloc_txq(struct vi_info *, struct sge_txq *, int, 213 struct sysctl_oid *); 214static int free_txq(struct vi_info *, struct sge_txq *); 215static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 216static inline void ring_fl_db(struct adapter *, struct sge_fl *); 217static int refill_fl(struct adapter *, struct sge_fl *, int); 218static void refill_sfl(void *); 219static int alloc_fl_sdesc(struct sge_fl *); 220static void free_fl_sdesc(struct adapter *, struct sge_fl *); 221static void find_best_refill_source(struct adapter *, struct sge_fl *, int); 222static void find_safe_refill_source(struct adapter *, struct sge_fl *); 223static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 224 225static inline void get_pkt_gl(struct mbuf *, struct sglist *); 226static inline u_int txpkt_len16(u_int, u_int); 227static inline u_int txpkt_vm_len16(u_int, u_int); 228static inline u_int txpkts0_len16(u_int); 229static inline u_int txpkts1_len16(void); 230static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *, 231 struct mbuf *, u_int); 232static u_int write_txpkt_vm_wr(struct sge_txq *, struct fw_eth_tx_pkt_vm_wr *, 233 struct mbuf *, u_int); 234static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int); 235static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int); 236static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *, 237 struct mbuf *, const struct txpkts *, u_int); 238static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); 239static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 240static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); 241static inline uint16_t read_hw_cidx(struct sge_eq *); 242static inline u_int reclaimable_tx_desc(struct sge_eq *); 243static inline u_int total_available_tx_desc(struct sge_eq *); 244static u_int reclaim_tx_descs(struct sge_txq *, u_int); 245static void tx_reclaim(void *, int); 246static __be64 get_flit(struct sglist_seg *, int, int); 247static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 248 struct mbuf *); 249static int handle_fw_msg(struct sge_iq *, const struct rss_header *, 250 struct mbuf *); 251static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); 252static void wrq_tx_drain(void *, int); 253static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); 254 255static int sysctl_uint16(SYSCTL_HANDLER_ARGS); 256static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); 257static int sysctl_tc(SYSCTL_HANDLER_ARGS); 258 259static counter_u64_t extfree_refs; 260static counter_u64_t extfree_rels; 261 262an_handler_t t4_an_handler; 263fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; 264cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; 265 266 267static int 268an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl) 269{ 270 271#ifdef INVARIANTS 272 panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl); 273#else 274 log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n", 275 __func__, iq, ctrl); 276#endif 277 return (EDOOFUS); 278} 279 280int 281t4_register_an_handler(an_handler_t h) 282{ 283 uintptr_t *loc, new; 284 285 new = h ? (uintptr_t)h : (uintptr_t)an_not_handled; 286 loc = (uintptr_t *) &t4_an_handler; 287 atomic_store_rel_ptr(loc, new); 288 289 return (0); 290} 291 292static int 293fw_msg_not_handled(struct adapter *sc, const __be64 *rpl) 294{ 295 const struct cpl_fw6_msg *cpl = 296 __containerof(rpl, struct cpl_fw6_msg, data[0]); 297 298#ifdef INVARIANTS 299 panic("%s: fw_msg type %d", __func__, cpl->type); 300#else 301 log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type); 302#endif 303 return (EDOOFUS); 304} 305 306int 307t4_register_fw_msg_handler(int type, fw_msg_handler_t h) 308{ 309 uintptr_t *loc, new; 310 311 if (type >= nitems(t4_fw_msg_handler)) 312 return (EINVAL); 313 314 /* 315 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL 316 * handler dispatch table. Reject any attempt to install a handler for 317 * this subtype. 318 */ 319 if (type == FW_TYPE_RSSCPL || type == FW6_TYPE_RSSCPL) 320 return (EINVAL); 321 322 new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled; 323 loc = (uintptr_t *) &t4_fw_msg_handler[type]; 324 atomic_store_rel_ptr(loc, new); 325 326 return (0); 327} 328 329static int 330cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 331{ 332 333#ifdef INVARIANTS 334 panic("%s: opcode 0x%02x on iq %p with payload %p", 335 __func__, rss->opcode, iq, m); 336#else 337 log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n", 338 __func__, rss->opcode, iq, m); 339 m_freem(m); 340#endif 341 return (EDOOFUS); 342} 343 344int 345t4_register_cpl_handler(int opcode, cpl_handler_t h) 346{ 347 uintptr_t *loc, new; 348 349 if (opcode >= nitems(t4_cpl_handler)) 350 return (EINVAL); 351 352 new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled; 353 loc = (uintptr_t *) &t4_cpl_handler[opcode]; 354 atomic_store_rel_ptr(loc, new); 355 356 return (0); 357} 358 359/* 360 * Called on MOD_LOAD. Validates and calculates the SGE tunables. 361 */ 362void 363t4_sge_modload(void) 364{ 365 int i; 366 367 if (fl_pktshift < 0 || fl_pktshift > 7) { 368 printf("Invalid hw.cxgbe.fl_pktshift value (%d)," 369 " using 2 instead.\n", fl_pktshift); 370 fl_pktshift = 2; 371 } 372 373 if (spg_len != 64 && spg_len != 128) { 374 int len; 375 376#if defined(__i386__) || defined(__amd64__) 377 len = cpu_clflush_line_size > 64 ? 128 : 64; 378#else 379 len = 64; 380#endif 381 if (spg_len != -1) { 382 printf("Invalid hw.cxgbe.spg_len value (%d)," 383 " using %d instead.\n", spg_len, len); 384 } 385 spg_len = len; 386 } 387 388 if (cong_drop < -1 || cong_drop > 1) { 389 printf("Invalid hw.cxgbe.cong_drop value (%d)," 390 " using 0 instead.\n", cong_drop); 391 cong_drop = 0; 392 } 393 394 extfree_refs = counter_u64_alloc(M_WAITOK); 395 extfree_rels = counter_u64_alloc(M_WAITOK); 396 counter_u64_zero(extfree_refs); 397 counter_u64_zero(extfree_rels); 398 399 t4_an_handler = an_not_handled; 400 for (i = 0; i < nitems(t4_fw_msg_handler); i++) 401 t4_fw_msg_handler[i] = fw_msg_not_handled; 402 for (i = 0; i < nitems(t4_cpl_handler); i++) 403 t4_cpl_handler[i] = cpl_not_handled; 404 405 t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); 406 t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); 407 t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 408 t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx); 409 t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); 410 t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); 411} 412 413void 414t4_sge_modunload(void) 415{ 416 417 counter_u64_free(extfree_refs); 418 counter_u64_free(extfree_rels); 419} 420 421uint64_t 422t4_sge_extfree_refs(void) 423{ 424 uint64_t refs, rels; 425 426 rels = counter_u64_fetch(extfree_rels); 427 refs = counter_u64_fetch(extfree_refs); 428 429 return (refs - rels); 430} 431 432static inline void 433setup_pad_and_pack_boundaries(struct adapter *sc) 434{ 435 uint32_t v, m; 436 int pad, pack; 437 438 pad = fl_pad; 439 if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) { 440 /* 441 * If there is any chance that we might use buffer packing and 442 * the chip is a T4, then pick 64 as the pad/pack boundary. Set 443 * it to 32 in all other cases. 444 */ 445 pad = is_t4(sc) && buffer_packing ? 64 : 32; 446 447 /* 448 * For fl_pad = 0 we'll still write a reasonable value to the 449 * register but all the freelists will opt out of padding. 450 * We'll complain here only if the user tried to set it to a 451 * value greater than 0 that was invalid. 452 */ 453 if (fl_pad > 0) { 454 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" 455 " (%d), using %d instead.\n", fl_pad, pad); 456 } 457 } 458 m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); 459 v = V_INGPADBOUNDARY(ilog2(pad) - 5); 460 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 461 462 if (is_t4(sc)) { 463 if (fl_pack != -1 && fl_pack != pad) { 464 /* Complain but carry on. */ 465 device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," 466 " using %d instead.\n", fl_pack, pad); 467 } 468 return; 469 } 470 471 pack = fl_pack; 472 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || 473 !powerof2(fl_pack)) { 474 pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); 475 MPASS(powerof2(pack)); 476 if (pack < 16) 477 pack = 16; 478 if (pack == 32) 479 pack = 64; 480 if (pack > 4096) 481 pack = 4096; 482 if (fl_pack != -1) { 483 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" 484 " (%d), using %d instead.\n", fl_pack, pack); 485 } 486 } 487 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 488 if (pack == 16) 489 v = V_INGPACKBOUNDARY(0); 490 else 491 v = V_INGPACKBOUNDARY(ilog2(pack) - 5); 492 493 MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ 494 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); 495} 496 497/* 498 * adap->params.vpd.cclk must be set up before this is called. 499 */ 500void 501t4_tweak_chip_settings(struct adapter *sc) 502{ 503 int i; 504 uint32_t v, m; 505 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 506 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; 507 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 508 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 509 static int sge_flbuf_sizes[] = { 510 MCLBYTES, 511#if MJUMPAGESIZE != MCLBYTES 512 MJUMPAGESIZE, 513 MJUMPAGESIZE - CL_METADATA_SIZE, 514 MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE, 515#endif 516 MJUM9BYTES, 517 MJUM16BYTES, 518 MCLBYTES - MSIZE - CL_METADATA_SIZE, 519 MJUM9BYTES - CL_METADATA_SIZE, 520 MJUM16BYTES - CL_METADATA_SIZE, 521 }; 522 523 KASSERT(sc->flags & MASTER_PF, 524 ("%s: trying to change chip settings when not master.", __func__)); 525 526 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 527 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 528 V_EGRSTATUSPAGESIZE(spg_len == 128); 529 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 530 531 setup_pad_and_pack_boundaries(sc); 532 533 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 534 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 535 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 536 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 537 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 538 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 539 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 540 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 541 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); 542 543 KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES, 544 ("%s: hw buffer size table too big", __func__)); 545 for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) { 546 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), 547 sge_flbuf_sizes[i]); 548 } 549 550 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | 551 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); 552 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); 553 554 KASSERT(intr_timer[0] <= timer_max, 555 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], 556 timer_max)); 557 for (i = 1; i < nitems(intr_timer); i++) { 558 KASSERT(intr_timer[i] >= intr_timer[i - 1], 559 ("%s: timers not listed in increasing order (%d)", 560 __func__, i)); 561 562 while (intr_timer[i] > timer_max) { 563 if (i == nitems(intr_timer) - 1) { 564 intr_timer[i] = timer_max; 565 break; 566 } 567 intr_timer[i] += intr_timer[i - 1]; 568 intr_timer[i] /= 2; 569 } 570 } 571 572 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 573 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); 574 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); 575 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 576 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); 577 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); 578 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 579 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); 580 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); 581 582 /* 4K, 16K, 64K, 256K DDP "page sizes" */ 583 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 584 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); 585 586 m = v = F_TDDPTAGTCB; 587 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); 588 589 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 590 F_RESETDDPOFFSET; 591 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 592 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); 593} 594 595/* 596 * SGE wants the buffer to be at least 64B and then a multiple of 16. If 597 * padding is is use the buffer's start and end need to be aligned to the pad 598 * boundary as well. We'll just make sure that the size is a multiple of the 599 * boundary here, it is up to the buffer allocation code to make sure the start 600 * of the buffer is aligned as well. 601 */ 602static inline int 603hwsz_ok(struct adapter *sc, int hwsz) 604{ 605 int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; 606 607 return (hwsz >= 64 && (hwsz & mask) == 0); 608} 609 610/* 611 * XXX: driver really should be able to deal with unexpected settings. 612 */ 613int 614t4_read_chip_settings(struct adapter *sc) 615{ 616 struct sge *s = &sc->sge; 617 struct sge_params *sp = &sc->params.sge; 618 int i, j, n, rc = 0; 619 uint32_t m, v, r; 620 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 621 static int sw_buf_sizes[] = { /* Sorted by size */ 622 MCLBYTES, 623#if MJUMPAGESIZE != MCLBYTES 624 MJUMPAGESIZE, 625#endif 626 MJUM9BYTES, 627 MJUM16BYTES 628 }; 629 struct sw_zone_info *swz, *safe_swz; 630 struct hw_buf_info *hwb; 631 632 m = F_RXPKTCPLMODE; 633 v = F_RXPKTCPLMODE; 634 r = sc->params.sge.sge_control; 635 if ((r & m) != v) { 636 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); 637 rc = EINVAL; 638 } 639 640 /* 641 * If this changes then every single use of PAGE_SHIFT in the driver 642 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. 643 */ 644 if (sp->page_shift != PAGE_SHIFT) { 645 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); 646 rc = EINVAL; 647 } 648 649 /* Filter out unusable hw buffer sizes entirely (mark with -2). */ 650 hwb = &s->hw_buf_info[0]; 651 for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { 652 r = sc->params.sge.sge_fl_buffer_size[i]; 653 hwb->size = r; 654 hwb->zidx = hwsz_ok(sc, r) ? -1 : -2; 655 hwb->next = -1; 656 } 657 658 /* 659 * Create a sorted list in decreasing order of hw buffer sizes (and so 660 * increasing order of spare area) for each software zone. 661 * 662 * If padding is enabled then the start and end of the buffer must align 663 * to the pad boundary; if packing is enabled then they must align with 664 * the pack boundary as well. Allocations from the cluster zones are 665 * aligned to min(size, 4K), so the buffer starts at that alignment and 666 * ends at hwb->size alignment. If mbuf inlining is allowed the 667 * starting alignment will be reduced to MSIZE and the driver will 668 * exercise appropriate caution when deciding on the best buffer layout 669 * to use. 670 */ 671 n = 0; /* no usable buffer size to begin with */ 672 swz = &s->sw_zone_info[0]; 673 safe_swz = NULL; 674 for (i = 0; i < SW_ZONE_SIZES; i++, swz++) { 675 int8_t head = -1, tail = -1; 676 677 swz->size = sw_buf_sizes[i]; 678 swz->zone = m_getzone(swz->size); 679 swz->type = m_gettype(swz->size); 680 681 if (swz->size < PAGE_SIZE) { 682 MPASS(powerof2(swz->size)); 683 if (fl_pad && (swz->size % sp->pad_boundary != 0)) 684 continue; 685 } 686 687 if (swz->size == safest_rx_cluster) 688 safe_swz = swz; 689 690 hwb = &s->hw_buf_info[0]; 691 for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { 692 if (hwb->zidx != -1 || hwb->size > swz->size) 693 continue; 694#ifdef INVARIANTS 695 if (fl_pad) 696 MPASS(hwb->size % sp->pad_boundary == 0); 697#endif 698 hwb->zidx = i; 699 if (head == -1) 700 head = tail = j; 701 else if (hwb->size < s->hw_buf_info[tail].size) { 702 s->hw_buf_info[tail].next = j; 703 tail = j; 704 } else { 705 int8_t *cur; 706 struct hw_buf_info *t; 707 708 for (cur = &head; *cur != -1; cur = &t->next) { 709 t = &s->hw_buf_info[*cur]; 710 if (hwb->size == t->size) { 711 hwb->zidx = -2; 712 break; 713 } 714 if (hwb->size > t->size) { 715 hwb->next = *cur; 716 *cur = j; 717 break; 718 } 719 } 720 } 721 } 722 swz->head_hwidx = head; 723 swz->tail_hwidx = tail; 724 725 if (tail != -1) { 726 n++; 727 if (swz->size - s->hw_buf_info[tail].size >= 728 CL_METADATA_SIZE) 729 sc->flags |= BUF_PACKING_OK; 730 } 731 } 732 if (n == 0) { 733 device_printf(sc->dev, "no usable SGE FL buffer size.\n"); 734 rc = EINVAL; 735 } 736 737 s->safe_hwidx1 = -1; 738 s->safe_hwidx2 = -1; 739 if (safe_swz != NULL) { 740 s->safe_hwidx1 = safe_swz->head_hwidx; 741 for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) { 742 int spare; 743 744 hwb = &s->hw_buf_info[i]; 745#ifdef INVARIANTS 746 if (fl_pad) 747 MPASS(hwb->size % sp->pad_boundary == 0); 748#endif 749 spare = safe_swz->size - hwb->size; 750 if (spare >= CL_METADATA_SIZE) { 751 s->safe_hwidx2 = i; 752 break; 753 } 754 } 755 } 756 757 if (sc->flags & IS_VF) 758 return (0); 759 760 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 761 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); 762 if (r != v) { 763 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); 764 rc = EINVAL; 765 } 766 767 m = v = F_TDDPTAGTCB; 768 r = t4_read_reg(sc, A_ULP_RX_CTL); 769 if ((r & m) != v) { 770 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); 771 rc = EINVAL; 772 } 773 774 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 775 F_RESETDDPOFFSET; 776 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 777 r = t4_read_reg(sc, A_TP_PARA_REG5); 778 if ((r & m) != v) { 779 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); 780 rc = EINVAL; 781 } 782 783 t4_init_tp_params(sc); 784 785 t4_read_mtu_tbl(sc, sc->params.mtus, NULL); 786 t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd); 787 788 return (rc); 789} 790 791int 792t4_create_dma_tag(struct adapter *sc) 793{ 794 int rc; 795 796 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 797 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 798 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 799 NULL, &sc->dmat); 800 if (rc != 0) { 801 device_printf(sc->dev, 802 "failed to create main DMA tag: %d\n", rc); 803 } 804 805 return (rc); 806} 807 808void 809t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 810 struct sysctl_oid_list *children) 811{ 812 struct sge_params *sp = &sc->params.sge; 813 814 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", 815 CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A", 816 "freelist buffer sizes"); 817 818 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, 819 NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); 820 821 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, 822 NULL, sp->pad_boundary, "payload pad boundary (bytes)"); 823 824 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, 825 NULL, sp->spg_len, "status page size (bytes)"); 826 827 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, 828 NULL, cong_drop, "congestion drop setting"); 829 830 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, 831 NULL, sp->pack_boundary, "payload pack boundary (bytes)"); 832} 833 834int 835t4_destroy_dma_tag(struct adapter *sc) 836{ 837 if (sc->dmat) 838 bus_dma_tag_destroy(sc->dmat); 839 840 return (0); 841} 842 843/* 844 * Allocate and initialize the firmware event queue and the management queue. 845 * 846 * Returns errno on failure. Resources allocated up to that point may still be 847 * allocated. Caller is responsible for cleanup in case this function fails. 848 */ 849int 850t4_setup_adapter_queues(struct adapter *sc) 851{ 852 int rc; 853 854 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 855 856 sysctl_ctx_init(&sc->ctx); 857 sc->flags |= ADAP_SYSCTL_CTX; 858 859 /* 860 * Firmware event queue 861 */ 862 rc = alloc_fwq(sc); 863 if (rc != 0) 864 return (rc); 865 866 /* 867 * Management queue. This is just a control queue that uses the fwq as 868 * its associated iq. 869 */ 870 if (!(sc->flags & IS_VF)) 871 rc = alloc_mgmtq(sc); 872 873 return (rc); 874} 875 876/* 877 * Idempotent 878 */ 879int 880t4_teardown_adapter_queues(struct adapter *sc) 881{ 882 883 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 884 885 /* Do this before freeing the queue */ 886 if (sc->flags & ADAP_SYSCTL_CTX) { 887 sysctl_ctx_free(&sc->ctx); 888 sc->flags &= ~ADAP_SYSCTL_CTX; 889 } 890 891 free_mgmtq(sc); 892 free_fwq(sc); 893 894 return (0); 895} 896 897static inline int 898first_vector(struct vi_info *vi) 899{ 900 struct adapter *sc = vi->pi->adapter; 901 902 if (sc->intr_count == 1) 903 return (0); 904 905 return (vi->first_intr); 906} 907 908/* 909 * Given an arbitrary "index," come up with an iq that can be used by other 910 * queues (of this VI) for interrupt forwarding, SGE egress updates, etc. 911 * The iq returned is guaranteed to be something that takes direct interrupts. 912 */ 913static struct sge_iq * 914vi_intr_iq(struct vi_info *vi, int idx) 915{ 916 struct adapter *sc = vi->pi->adapter; 917 struct sge *s = &sc->sge; 918 struct sge_iq *iq = NULL; 919 int nintr, i; 920 921 if (sc->intr_count == 1) 922 return (&sc->sge.fwq); 923 924 nintr = vi->nintr; 925 KASSERT(nintr != 0, 926 ("%s: vi %p has no exclusive interrupts, total interrupts = %d", 927 __func__, vi, sc->intr_count)); 928 i = idx % nintr; 929 930 if (vi->flags & INTR_RXQ) { 931 if (i < vi->nrxq) { 932 iq = &s->rxq[vi->first_rxq + i].iq; 933 goto done; 934 } 935 i -= vi->nrxq; 936 } 937#ifdef TCP_OFFLOAD 938 if (vi->flags & INTR_OFLD_RXQ) { 939 if (i < vi->nofldrxq) { 940 iq = &s->ofld_rxq[vi->first_ofld_rxq + i].iq; 941 goto done; 942 } 943 i -= vi->nofldrxq; 944 } 945#endif 946 panic("%s: vi %p, intr_flags 0x%lx, idx %d, total intr %d\n", __func__, 947 vi, vi->flags & INTR_ALL, idx, nintr); 948done: 949 MPASS(iq != NULL); 950 KASSERT(iq->flags & IQ_INTR, 951 ("%s: iq %p (vi %p, intr_flags 0x%lx, idx %d)", __func__, iq, vi, 952 vi->flags & INTR_ALL, idx)); 953 return (iq); 954} 955 956/* Maximum payload that can be delivered with a single iq descriptor */ 957static inline int 958mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) 959{ 960 int payload; 961 962#ifdef TCP_OFFLOAD 963 if (toe) { 964 payload = sc->tt.rx_coalesce ? 965 G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)) : mtu; 966 } else { 967#endif 968 /* large enough even when hw VLAN extraction is disabled */ 969 payload = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + 970 ETHER_VLAN_ENCAP_LEN + mtu; 971#ifdef TCP_OFFLOAD 972 } 973#endif 974 975 return (payload); 976} 977 978int 979t4_setup_vi_queues(struct vi_info *vi) 980{ 981 int rc = 0, i, j, intr_idx, iqid; 982 struct sge_rxq *rxq; 983 struct sge_txq *txq; 984 struct sge_wrq *ctrlq; 985#ifdef TCP_OFFLOAD 986 struct sge_ofld_rxq *ofld_rxq; 987 struct sge_wrq *ofld_txq; 988#endif 989#ifdef DEV_NETMAP 990 int saved_idx; 991 struct sge_nm_rxq *nm_rxq; 992 struct sge_nm_txq *nm_txq; 993#endif 994 char name[16]; 995 struct port_info *pi = vi->pi; 996 struct adapter *sc = pi->adapter; 997 struct ifnet *ifp = vi->ifp; 998 struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev); 999 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 1000 int maxp, mtu = ifp->if_mtu; 1001 1002 /* Interrupt vector to start from (when using multiple vectors) */ 1003 intr_idx = first_vector(vi); 1004 1005#ifdef DEV_NETMAP 1006 saved_idx = intr_idx; 1007 if (ifp->if_capabilities & IFCAP_NETMAP) { 1008 1009 /* netmap is supported with direct interrupts only. */ 1010 MPASS(vi->flags & INTR_RXQ); 1011 1012 /* 1013 * We don't have buffers to back the netmap rx queues 1014 * right now so we create the queues in a way that 1015 * doesn't set off any congestion signal in the chip. 1016 */ 1017 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq", 1018 CTLFLAG_RD, NULL, "rx queues"); 1019 for_each_nm_rxq(vi, i, nm_rxq) { 1020 rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid); 1021 if (rc != 0) 1022 goto done; 1023 intr_idx++; 1024 } 1025 1026 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", 1027 CTLFLAG_RD, NULL, "tx queues"); 1028 for_each_nm_txq(vi, i, nm_txq) { 1029 iqid = vi->first_nm_rxq + (i % vi->nnmrxq); 1030 rc = alloc_nm_txq(vi, nm_txq, iqid, i, oid); 1031 if (rc != 0) 1032 goto done; 1033 } 1034 } 1035 1036 /* Normal rx queues and netmap rx queues share the same interrupts. */ 1037 intr_idx = saved_idx; 1038#endif 1039 1040 /* 1041 * First pass over all NIC and TOE rx queues: 1042 * a) initialize iq and fl 1043 * b) allocate queue iff it will take direct interrupts. 1044 */ 1045 maxp = mtu_to_max_payload(sc, mtu, 0); 1046 if (vi->flags & INTR_RXQ) { 1047 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", 1048 CTLFLAG_RD, NULL, "rx queues"); 1049 } 1050 for_each_rxq(vi, i, rxq) { 1051 1052 init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); 1053 1054 snprintf(name, sizeof(name), "%s rxq%d-fl", 1055 device_get_nameunit(vi->dev), i); 1056 init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); 1057 1058 if (vi->flags & INTR_RXQ) { 1059 rxq->iq.flags |= IQ_INTR; 1060 rc = alloc_rxq(vi, rxq, intr_idx, i, oid); 1061 if (rc != 0) 1062 goto done; 1063 intr_idx++; 1064 } 1065 } 1066#ifdef DEV_NETMAP 1067 if (ifp->if_capabilities & IFCAP_NETMAP) 1068 intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); 1069#endif 1070#ifdef TCP_OFFLOAD 1071 maxp = mtu_to_max_payload(sc, mtu, 1); 1072 if (vi->flags & INTR_OFLD_RXQ) { 1073 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", 1074 CTLFLAG_RD, NULL, 1075 "rx queues for offloaded TCP connections"); 1076 } 1077 for_each_ofld_rxq(vi, i, ofld_rxq) { 1078 1079 init_iq(&ofld_rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, 1080 vi->qsize_rxq); 1081 1082 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 1083 device_get_nameunit(vi->dev), i); 1084 init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); 1085 1086 if (vi->flags & INTR_OFLD_RXQ) { 1087 ofld_rxq->iq.flags |= IQ_INTR; 1088 rc = alloc_ofld_rxq(vi, ofld_rxq, intr_idx, i, oid); 1089 if (rc != 0) 1090 goto done; 1091 intr_idx++; 1092 } 1093 } 1094#endif 1095 1096 /* 1097 * Second pass over all NIC and TOE rx queues. The queues forwarding 1098 * their interrupts are allocated now. 1099 */ 1100 j = 0; 1101 if (!(vi->flags & INTR_RXQ)) { 1102 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", 1103 CTLFLAG_RD, NULL, "rx queues"); 1104 for_each_rxq(vi, i, rxq) { 1105 MPASS(!(rxq->iq.flags & IQ_INTR)); 1106 1107 intr_idx = vi_intr_iq(vi, j)->abs_id; 1108 1109 rc = alloc_rxq(vi, rxq, intr_idx, i, oid); 1110 if (rc != 0) 1111 goto done; 1112 j++; 1113 } 1114 } 1115#ifdef TCP_OFFLOAD 1116 if (vi->nofldrxq != 0 && !(vi->flags & INTR_OFLD_RXQ)) { 1117 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", 1118 CTLFLAG_RD, NULL, 1119 "rx queues for offloaded TCP connections"); 1120 for_each_ofld_rxq(vi, i, ofld_rxq) { 1121 MPASS(!(ofld_rxq->iq.flags & IQ_INTR)); 1122 1123 intr_idx = vi_intr_iq(vi, j)->abs_id; 1124 1125 rc = alloc_ofld_rxq(vi, ofld_rxq, intr_idx, i, oid); 1126 if (rc != 0) 1127 goto done; 1128 j++; 1129 } 1130 } 1131#endif 1132 1133 /* 1134 * Now the tx queues. Only one pass needed. 1135 */ 1136 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, 1137 NULL, "tx queues"); 1138 j = 0; 1139 for_each_txq(vi, i, txq) { 1140 iqid = vi_intr_iq(vi, j)->cntxt_id; 1141 snprintf(name, sizeof(name), "%s txq%d", 1142 device_get_nameunit(vi->dev), i); 1143 init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, iqid, 1144 name); 1145 1146 rc = alloc_txq(vi, txq, i, oid); 1147 if (rc != 0) 1148 goto done; 1149 j++; 1150 } 1151#ifdef TCP_OFFLOAD 1152 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", 1153 CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections"); 1154 for_each_ofld_txq(vi, i, ofld_txq) { 1155 struct sysctl_oid *oid2; 1156 1157 iqid = vi_intr_iq(vi, j)->cntxt_id; 1158 snprintf(name, sizeof(name), "%s ofld_txq%d", 1159 device_get_nameunit(vi->dev), i); 1160 init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, 1161 iqid, name); 1162 1163 snprintf(name, sizeof(name), "%d", i); 1164 oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 1165 name, CTLFLAG_RD, NULL, "offload tx queue"); 1166 1167 rc = alloc_wrq(sc, vi, ofld_txq, oid2); 1168 if (rc != 0) 1169 goto done; 1170 j++; 1171 } 1172#endif 1173 1174 /* 1175 * Finally, the control queue. 1176 */ 1177 if (!IS_MAIN_VI(vi) || sc->flags & IS_VF) 1178 goto done; 1179 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, 1180 NULL, "ctrl queue"); 1181 ctrlq = &sc->sge.ctrlq[pi->port_id]; 1182 iqid = vi_intr_iq(vi, 0)->cntxt_id; 1183 snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(vi->dev)); 1184 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, 1185 name); 1186 rc = alloc_wrq(sc, vi, ctrlq, oid); 1187 1188done: 1189 if (rc) 1190 t4_teardown_vi_queues(vi); 1191 1192 return (rc); 1193} 1194 1195/* 1196 * Idempotent 1197 */ 1198int 1199t4_teardown_vi_queues(struct vi_info *vi) 1200{ 1201 int i; 1202 struct port_info *pi = vi->pi; 1203 struct adapter *sc = pi->adapter; 1204 struct sge_rxq *rxq; 1205 struct sge_txq *txq; 1206#ifdef TCP_OFFLOAD 1207 struct sge_ofld_rxq *ofld_rxq; 1208 struct sge_wrq *ofld_txq; 1209#endif 1210#ifdef DEV_NETMAP 1211 struct sge_nm_rxq *nm_rxq; 1212 struct sge_nm_txq *nm_txq; 1213#endif 1214 1215 /* Do this before freeing the queues */ 1216 if (vi->flags & VI_SYSCTL_CTX) { 1217 sysctl_ctx_free(&vi->ctx); 1218 vi->flags &= ~VI_SYSCTL_CTX; 1219 } 1220 1221#ifdef DEV_NETMAP 1222 if (vi->ifp->if_capabilities & IFCAP_NETMAP) { 1223 for_each_nm_txq(vi, i, nm_txq) { 1224 free_nm_txq(vi, nm_txq); 1225 } 1226 1227 for_each_nm_rxq(vi, i, nm_rxq) { 1228 free_nm_rxq(vi, nm_rxq); 1229 } 1230 } 1231#endif 1232 1233 /* 1234 * Take down all the tx queues first, as they reference the rx queues 1235 * (for egress updates, etc.). 1236 */ 1237 1238 if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) 1239 free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); 1240 1241 for_each_txq(vi, i, txq) { 1242 free_txq(vi, txq); 1243 } 1244#ifdef TCP_OFFLOAD 1245 for_each_ofld_txq(vi, i, ofld_txq) { 1246 free_wrq(sc, ofld_txq); 1247 } 1248#endif 1249 1250 /* 1251 * Then take down the rx queues that forward their interrupts, as they 1252 * reference other rx queues. 1253 */ 1254 1255 for_each_rxq(vi, i, rxq) { 1256 if ((rxq->iq.flags & IQ_INTR) == 0) 1257 free_rxq(vi, rxq); 1258 } 1259#ifdef TCP_OFFLOAD 1260 for_each_ofld_rxq(vi, i, ofld_rxq) { 1261 if ((ofld_rxq->iq.flags & IQ_INTR) == 0) 1262 free_ofld_rxq(vi, ofld_rxq); 1263 } 1264#endif 1265 1266 /* 1267 * Then take down the rx queues that take direct interrupts. 1268 */ 1269 1270 for_each_rxq(vi, i, rxq) { 1271 if (rxq->iq.flags & IQ_INTR) 1272 free_rxq(vi, rxq); 1273 } 1274#ifdef TCP_OFFLOAD 1275 for_each_ofld_rxq(vi, i, ofld_rxq) { 1276 if (ofld_rxq->iq.flags & IQ_INTR) 1277 free_ofld_rxq(vi, ofld_rxq); 1278 } 1279#endif 1280 1281 return (0); 1282} 1283 1284/* 1285 * Deals with errors and the firmware event queue. All data rx queues forward 1286 * their interrupt to the firmware event queue. 1287 */ 1288void 1289t4_intr_all(void *arg) 1290{ 1291 struct adapter *sc = arg; 1292 struct sge_iq *fwq = &sc->sge.fwq; 1293 1294 t4_intr_err(arg); 1295 if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) { 1296 service_iq(fwq, 0); 1297 atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE); 1298 } 1299} 1300 1301/* Deals with error interrupts */ 1302void 1303t4_intr_err(void *arg) 1304{ 1305 struct adapter *sc = arg; 1306 1307 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 1308 t4_slow_intr_handler(sc); 1309} 1310 1311void 1312t4_intr_evt(void *arg) 1313{ 1314 struct sge_iq *iq = arg; 1315 1316 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1317 service_iq(iq, 0); 1318 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1319 } 1320} 1321 1322void 1323t4_intr(void *arg) 1324{ 1325 struct sge_iq *iq = arg; 1326 1327 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1328 service_iq(iq, 0); 1329 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1330 } 1331} 1332 1333void 1334t4_vi_intr(void *arg) 1335{ 1336 struct irq *irq = arg; 1337 1338#ifdef DEV_NETMAP 1339 if (atomic_cmpset_int(&irq->nm_state, NM_ON, NM_BUSY)) { 1340 t4_nm_intr(irq->nm_rxq); 1341 atomic_cmpset_int(&irq->nm_state, NM_BUSY, NM_ON); 1342 } 1343#endif 1344 if (irq->rxq != NULL) 1345 t4_intr(irq->rxq); 1346} 1347 1348/* 1349 * Deals with anything and everything on the given ingress queue. 1350 */ 1351static int 1352service_iq(struct sge_iq *iq, int budget) 1353{ 1354 struct sge_iq *q; 1355 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ 1356 struct sge_fl *fl; /* Use iff IQ_HAS_FL */ 1357 struct adapter *sc = iq->adapter; 1358 struct iq_desc *d = &iq->desc[iq->cidx]; 1359 int ndescs = 0, limit; 1360 int rsp_type, refill; 1361 uint32_t lq; 1362 uint16_t fl_hw_cidx; 1363 struct mbuf *m0; 1364 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 1365#if defined(INET) || defined(INET6) 1366 const struct timeval lro_timeout = {0, sc->lro_timeout}; 1367#endif 1368 1369 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1370 1371 limit = budget ? budget : iq->qsize / 16; 1372 1373 if (iq->flags & IQ_HAS_FL) { 1374 fl = &rxq->fl; 1375 fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ 1376 } else { 1377 fl = NULL; 1378 fl_hw_cidx = 0; /* to silence gcc warning */ 1379 } 1380 1381 /* 1382 * We always come back and check the descriptor ring for new indirect 1383 * interrupts and other responses after running a single handler. 1384 */ 1385 for (;;) { 1386 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1387 1388 rmb(); 1389 1390 refill = 0; 1391 m0 = NULL; 1392 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1393 lq = be32toh(d->rsp.pldbuflen_qid); 1394 1395 switch (rsp_type) { 1396 case X_RSPD_TYPE_FLBUF: 1397 1398 KASSERT(iq->flags & IQ_HAS_FL, 1399 ("%s: data for an iq (%p) with no freelist", 1400 __func__, iq)); 1401 1402 m0 = get_fl_payload(sc, fl, lq); 1403 if (__predict_false(m0 == NULL)) 1404 goto process_iql; 1405 refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2; 1406#ifdef T4_PKT_TIMESTAMP 1407 /* 1408 * 60 bit timestamp for the payload is 1409 * *(uint64_t *)m0->m_pktdat. Note that it is 1410 * in the leading free-space in the mbuf. The 1411 * kernel can clobber it during a pullup, 1412 * m_copymdata, etc. You need to make sure that 1413 * the mbuf reaches you unmolested if you care 1414 * about the timestamp. 1415 */ 1416 *(uint64_t *)m0->m_pktdat = 1417 be64toh(ctrl->u.last_flit) & 1418 0xfffffffffffffff; 1419#endif 1420 1421 /* fall through */ 1422 1423 case X_RSPD_TYPE_CPL: 1424 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1425 ("%s: bad opcode %02x.", __func__, 1426 d->rss.opcode)); 1427 t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); 1428 break; 1429 1430 case X_RSPD_TYPE_INTR: 1431 1432 /* 1433 * Interrupts should be forwarded only to queues 1434 * that are not forwarding their interrupts. 1435 * This means service_iq can recurse but only 1 1436 * level deep. 1437 */ 1438 KASSERT(budget == 0, 1439 ("%s: budget %u, rsp_type %u", __func__, 1440 budget, rsp_type)); 1441 1442 /* 1443 * There are 1K interrupt-capable queues (qids 0 1444 * through 1023). A response type indicating a 1445 * forwarded interrupt with a qid >= 1K is an 1446 * iWARP async notification. 1447 */ 1448 if (lq >= 1024) { 1449 t4_an_handler(iq, &d->rsp); 1450 break; 1451 } 1452 1453 q = sc->sge.iqmap[lq - sc->sge.iq_start - 1454 sc->sge.iq_base]; 1455 if (atomic_cmpset_int(&q->state, IQS_IDLE, 1456 IQS_BUSY)) { 1457 if (service_iq(q, q->qsize / 16) == 0) { 1458 atomic_cmpset_int(&q->state, 1459 IQS_BUSY, IQS_IDLE); 1460 } else { 1461 STAILQ_INSERT_TAIL(&iql, q, 1462 link); 1463 } 1464 } 1465 break; 1466 1467 default: 1468 KASSERT(0, 1469 ("%s: illegal response type %d on iq %p", 1470 __func__, rsp_type, iq)); 1471 log(LOG_ERR, 1472 "%s: illegal response type %d on iq %p", 1473 device_get_nameunit(sc->dev), rsp_type, iq); 1474 break; 1475 } 1476 1477 d++; 1478 if (__predict_false(++iq->cidx == iq->sidx)) { 1479 iq->cidx = 0; 1480 iq->gen ^= F_RSPD_GEN; 1481 d = &iq->desc[0]; 1482 } 1483 if (__predict_false(++ndescs == limit)) { 1484 t4_write_reg(sc, sc->sge_gts_reg, 1485 V_CIDXINC(ndescs) | 1486 V_INGRESSQID(iq->cntxt_id) | 1487 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1488 ndescs = 0; 1489 1490#if defined(INET) || defined(INET6) 1491 if (iq->flags & IQ_LRO_ENABLED && 1492 sc->lro_timeout != 0) { 1493 tcp_lro_flush_inactive(&rxq->lro, 1494 &lro_timeout); 1495 } 1496#endif 1497 1498 if (budget) { 1499 if (iq->flags & IQ_HAS_FL) { 1500 FL_LOCK(fl); 1501 refill_fl(sc, fl, 32); 1502 FL_UNLOCK(fl); 1503 } 1504 return (EINPROGRESS); 1505 } 1506 } 1507 if (refill) { 1508 FL_LOCK(fl); 1509 refill_fl(sc, fl, 32); 1510 FL_UNLOCK(fl); 1511 fl_hw_cidx = fl->hw_cidx; 1512 } 1513 } 1514 1515process_iql: 1516 if (STAILQ_EMPTY(&iql)) 1517 break; 1518 1519 /* 1520 * Process the head only, and send it to the back of the list if 1521 * it's still not done. 1522 */ 1523 q = STAILQ_FIRST(&iql); 1524 STAILQ_REMOVE_HEAD(&iql, link); 1525 if (service_iq(q, q->qsize / 8) == 0) 1526 atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 1527 else 1528 STAILQ_INSERT_TAIL(&iql, q, link); 1529 } 1530 1531#if defined(INET) || defined(INET6) 1532 if (iq->flags & IQ_LRO_ENABLED) { 1533 struct lro_ctrl *lro = &rxq->lro; 1534 1535 tcp_lro_flush_all(lro); 1536 } 1537#endif 1538 1539 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1540 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1541 1542 if (iq->flags & IQ_HAS_FL) { 1543 int starved; 1544 1545 FL_LOCK(fl); 1546 starved = refill_fl(sc, fl, 64); 1547 FL_UNLOCK(fl); 1548 if (__predict_false(starved != 0)) 1549 add_fl_to_sfl(sc, fl); 1550 } 1551 1552 return (0); 1553} 1554 1555static inline int 1556cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll) 1557{ 1558 int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0; 1559 1560 if (rc) 1561 MPASS(cll->region3 >= CL_METADATA_SIZE); 1562 1563 return (rc); 1564} 1565 1566static inline struct cluster_metadata * 1567cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll, 1568 caddr_t cl) 1569{ 1570 1571 if (cl_has_metadata(fl, cll)) { 1572 struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; 1573 1574 return ((struct cluster_metadata *)(cl + swz->size) - 1); 1575 } 1576 return (NULL); 1577} 1578 1579static void 1580rxb_free(struct mbuf *m, void *arg1, void *arg2) 1581{ 1582 uma_zone_t zone = arg1; 1583 caddr_t cl = arg2; 1584 1585 uma_zfree(zone, cl); 1586 counter_u64_add(extfree_rels, 1); 1587} 1588 1589/* 1590 * The mbuf returned by this function could be allocated from zone_mbuf or 1591 * constructed in spare room in the cluster. 1592 * 1593 * The mbuf carries the payload in one of these ways 1594 * a) frame inside the mbuf (mbuf from zone_mbuf) 1595 * b) m_cljset (for clusters without metadata) zone_mbuf 1596 * c) m_extaddref (cluster with metadata) inline mbuf 1597 * d) m_extaddref (cluster with metadata) zone_mbuf 1598 */ 1599static struct mbuf * 1600get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1601 int remaining) 1602{ 1603 struct mbuf *m; 1604 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1605 struct cluster_layout *cll = &sd->cll; 1606 struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; 1607 struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx]; 1608 struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl); 1609 int len, blen; 1610 caddr_t payload; 1611 1612 blen = hwb->size - fl->rx_offset; /* max possible in this buf */ 1613 len = min(remaining, blen); 1614 payload = sd->cl + cll->region1 + fl->rx_offset; 1615 if (fl->flags & FL_BUF_PACKING) { 1616 const u_int l = fr_offset + len; 1617 const u_int pad = roundup2(l, fl->buf_boundary) - l; 1618 1619 if (fl->rx_offset + len + pad < hwb->size) 1620 blen = len + pad; 1621 MPASS(fl->rx_offset + blen <= hwb->size); 1622 } else { 1623 MPASS(fl->rx_offset == 0); /* not packing */ 1624 } 1625 1626 1627 if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { 1628 1629 /* 1630 * Copy payload into a freshly allocated mbuf. 1631 */ 1632 1633 m = fr_offset == 0 ? 1634 m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); 1635 if (m == NULL) 1636 return (NULL); 1637 fl->mbuf_allocated++; 1638#ifdef T4_PKT_TIMESTAMP 1639 /* Leave room for a timestamp */ 1640 m->m_data += 8; 1641#endif 1642 /* copy data to mbuf */ 1643 bcopy(payload, mtod(m, caddr_t), len); 1644 1645 } else if (sd->nmbuf * MSIZE < cll->region1) { 1646 1647 /* 1648 * There's spare room in the cluster for an mbuf. Create one 1649 * and associate it with the payload that's in the cluster. 1650 */ 1651 1652 MPASS(clm != NULL); 1653 m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE); 1654 /* No bzero required */ 1655 if (m_init(m, M_NOWAIT, MT_DATA, 1656 fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE)) 1657 return (NULL); 1658 fl->mbuf_inlined++; 1659 m_extaddref(m, payload, blen, &clm->refcount, rxb_free, 1660 swz->zone, sd->cl); 1661 if (sd->nmbuf++ == 0) 1662 counter_u64_add(extfree_refs, 1); 1663 1664 } else { 1665 1666 /* 1667 * Grab an mbuf from zone_mbuf and associate it with the 1668 * payload in the cluster. 1669 */ 1670 1671 m = fr_offset == 0 ? 1672 m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); 1673 if (m == NULL) 1674 return (NULL); 1675 fl->mbuf_allocated++; 1676 if (clm != NULL) { 1677 m_extaddref(m, payload, blen, &clm->refcount, 1678 rxb_free, swz->zone, sd->cl); 1679 if (sd->nmbuf++ == 0) 1680 counter_u64_add(extfree_refs, 1); 1681 } else { 1682 m_cljset(m, sd->cl, swz->type); 1683 sd->cl = NULL; /* consumed, not a recycle candidate */ 1684 } 1685 } 1686 if (fr_offset == 0) 1687 m->m_pkthdr.len = remaining; 1688 m->m_len = len; 1689 1690 if (fl->flags & FL_BUF_PACKING) { 1691 fl->rx_offset += blen; 1692 MPASS(fl->rx_offset <= hwb->size); 1693 if (fl->rx_offset < hwb->size) 1694 return (m); /* without advancing the cidx */ 1695 } 1696 1697 if (__predict_false(++fl->cidx % 8 == 0)) { 1698 uint16_t cidx = fl->cidx / 8; 1699 1700 if (__predict_false(cidx == fl->sidx)) 1701 fl->cidx = cidx = 0; 1702 fl->hw_cidx = cidx; 1703 } 1704 fl->rx_offset = 0; 1705 1706 return (m); 1707} 1708 1709static struct mbuf * 1710get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf) 1711{ 1712 struct mbuf *m0, *m, **pnext; 1713 u_int remaining; 1714 const u_int total = G_RSPD_LEN(len_newbuf); 1715 1716 if (__predict_false(fl->flags & FL_BUF_RESUME)) { 1717 M_ASSERTPKTHDR(fl->m0); 1718 MPASS(fl->m0->m_pkthdr.len == total); 1719 MPASS(fl->remaining < total); 1720 1721 m0 = fl->m0; 1722 pnext = fl->pnext; 1723 remaining = fl->remaining; 1724 fl->flags &= ~FL_BUF_RESUME; 1725 goto get_segment; 1726 } 1727 1728 if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) { 1729 fl->rx_offset = 0; 1730 if (__predict_false(++fl->cidx % 8 == 0)) { 1731 uint16_t cidx = fl->cidx / 8; 1732 1733 if (__predict_false(cidx == fl->sidx)) 1734 fl->cidx = cidx = 0; 1735 fl->hw_cidx = cidx; 1736 } 1737 } 1738 1739 /* 1740 * Payload starts at rx_offset in the current hw buffer. Its length is 1741 * 'len' and it may span multiple hw buffers. 1742 */ 1743 1744 m0 = get_scatter_segment(sc, fl, 0, total); 1745 if (m0 == NULL) 1746 return (NULL); 1747 remaining = total - m0->m_len; 1748 pnext = &m0->m_next; 1749 while (remaining > 0) { 1750get_segment: 1751 MPASS(fl->rx_offset == 0); 1752 m = get_scatter_segment(sc, fl, total - remaining, remaining); 1753 if (__predict_false(m == NULL)) { 1754 fl->m0 = m0; 1755 fl->pnext = pnext; 1756 fl->remaining = remaining; 1757 fl->flags |= FL_BUF_RESUME; 1758 return (NULL); 1759 } 1760 *pnext = m; 1761 pnext = &m->m_next; 1762 remaining -= m->m_len; 1763 } 1764 *pnext = NULL; 1765 1766 M_ASSERTPKTHDR(m0); 1767 return (m0); 1768} 1769 1770static int 1771t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 1772{ 1773 struct sge_rxq *rxq = iq_to_rxq(iq); 1774 struct ifnet *ifp = rxq->ifp; 1775 struct adapter *sc = iq->adapter; 1776 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); 1777#if defined(INET) || defined(INET6) 1778 struct lro_ctrl *lro = &rxq->lro; 1779#endif 1780 static const int sw_hashtype[4][2] = { 1781 {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, 1782 {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, 1783 {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, 1784 {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, 1785 }; 1786 1787 KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, 1788 rss->opcode)); 1789 1790 m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; 1791 m0->m_len -= sc->params.sge.fl_pktshift; 1792 m0->m_data += sc->params.sge.fl_pktshift; 1793 1794 m0->m_pkthdr.rcvif = ifp; 1795 M_HASHTYPE_SET(m0, sw_hashtype[rss->hash_type][rss->ipv6]); 1796 m0->m_pkthdr.flowid = be32toh(rss->hash_val); 1797 1798 if (cpl->csum_calc && !cpl->err_vec) { 1799 if (ifp->if_capenable & IFCAP_RXCSUM && 1800 cpl->l2info & htobe32(F_RXF_IP)) { 1801 m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | 1802 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1803 rxq->rxcsum++; 1804 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 1805 cpl->l2info & htobe32(F_RXF_IP6)) { 1806 m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | 1807 CSUM_PSEUDO_HDR); 1808 rxq->rxcsum++; 1809 } 1810 1811 if (__predict_false(cpl->ip_frag)) 1812 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 1813 else 1814 m0->m_pkthdr.csum_data = 0xffff; 1815 } 1816 1817 if (cpl->vlan_ex) { 1818 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 1819 m0->m_flags |= M_VLANTAG; 1820 rxq->vlan_extraction++; 1821 } 1822 1823#if defined(INET) || defined(INET6) 1824 if (cpl->l2info & htobe32(F_RXF_LRO) && 1825 iq->flags & IQ_LRO_ENABLED && 1826 tcp_lro_rx(lro, m0, 0) == 0) { 1827 /* queued for LRO */ 1828 } else 1829#endif 1830 ifp->if_input(ifp, m0); 1831 1832 return (0); 1833} 1834 1835/* 1836 * Must drain the wrq or make sure that someone else will. 1837 */ 1838static void 1839wrq_tx_drain(void *arg, int n) 1840{ 1841 struct sge_wrq *wrq = arg; 1842 struct sge_eq *eq = &wrq->eq; 1843 1844 EQ_LOCK(eq); 1845 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 1846 drain_wrq_wr_list(wrq->adapter, wrq); 1847 EQ_UNLOCK(eq); 1848} 1849 1850static void 1851drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) 1852{ 1853 struct sge_eq *eq = &wrq->eq; 1854 u_int available, dbdiff; /* # of hardware descriptors */ 1855 u_int n; 1856 struct wrqe *wr; 1857 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 1858 1859 EQ_LOCK_ASSERT_OWNED(eq); 1860 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 1861 wr = STAILQ_FIRST(&wrq->wr_list); 1862 MPASS(wr != NULL); /* Must be called with something useful to do */ 1863 MPASS(eq->pidx == eq->dbidx); 1864 dbdiff = 0; 1865 1866 do { 1867 eq->cidx = read_hw_cidx(eq); 1868 if (eq->pidx == eq->cidx) 1869 available = eq->sidx - 1; 1870 else 1871 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 1872 1873 MPASS(wr->wrq == wrq); 1874 n = howmany(wr->wr_len, EQ_ESIZE); 1875 if (available < n) 1876 break; 1877 1878 dst = (void *)&eq->desc[eq->pidx]; 1879 if (__predict_true(eq->sidx - eq->pidx > n)) { 1880 /* Won't wrap, won't end exactly at the status page. */ 1881 bcopy(&wr->wr[0], dst, wr->wr_len); 1882 eq->pidx += n; 1883 } else { 1884 int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; 1885 1886 bcopy(&wr->wr[0], dst, first_portion); 1887 if (wr->wr_len > first_portion) { 1888 bcopy(&wr->wr[first_portion], &eq->desc[0], 1889 wr->wr_len - first_portion); 1890 } 1891 eq->pidx = n - (eq->sidx - eq->pidx); 1892 } 1893 1894 if (available < eq->sidx / 4 && 1895 atomic_cmpset_int(&eq->equiq, 0, 1)) { 1896 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 1897 F_FW_WR_EQUEQ); 1898 eq->equeqidx = eq->pidx; 1899 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 1900 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 1901 eq->equeqidx = eq->pidx; 1902 } 1903 1904 dbdiff += n; 1905 if (dbdiff >= 16) { 1906 ring_eq_db(sc, eq, dbdiff); 1907 dbdiff = 0; 1908 } 1909 1910 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 1911 free_wrqe(wr); 1912 MPASS(wrq->nwr_pending > 0); 1913 wrq->nwr_pending--; 1914 MPASS(wrq->ndesc_needed >= n); 1915 wrq->ndesc_needed -= n; 1916 } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); 1917 1918 if (dbdiff) 1919 ring_eq_db(sc, eq, dbdiff); 1920} 1921 1922/* 1923 * Doesn't fail. Holds on to work requests it can't send right away. 1924 */ 1925void 1926t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 1927{ 1928#ifdef INVARIANTS 1929 struct sge_eq *eq = &wrq->eq; 1930#endif 1931 1932 EQ_LOCK_ASSERT_OWNED(eq); 1933 MPASS(wr != NULL); 1934 MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); 1935 MPASS((wr->wr_len & 0x7) == 0); 1936 1937 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 1938 wrq->nwr_pending++; 1939 wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); 1940 1941 if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) 1942 return; /* commit_wrq_wr will drain wr_list as well. */ 1943 1944 drain_wrq_wr_list(sc, wrq); 1945 1946 /* Doorbell must have caught up to the pidx. */ 1947 MPASS(eq->pidx == eq->dbidx); 1948} 1949 1950void 1951t4_update_fl_bufsize(struct ifnet *ifp) 1952{ 1953 struct vi_info *vi = ifp->if_softc; 1954 struct adapter *sc = vi->pi->adapter; 1955 struct sge_rxq *rxq; 1956#ifdef TCP_OFFLOAD 1957 struct sge_ofld_rxq *ofld_rxq; 1958#endif 1959 struct sge_fl *fl; 1960 int i, maxp, mtu = ifp->if_mtu; 1961 1962 maxp = mtu_to_max_payload(sc, mtu, 0); 1963 for_each_rxq(vi, i, rxq) { 1964 fl = &rxq->fl; 1965 1966 FL_LOCK(fl); 1967 find_best_refill_source(sc, fl, maxp); 1968 FL_UNLOCK(fl); 1969 } 1970#ifdef TCP_OFFLOAD 1971 maxp = mtu_to_max_payload(sc, mtu, 1); 1972 for_each_ofld_rxq(vi, i, ofld_rxq) { 1973 fl = &ofld_rxq->fl; 1974 1975 FL_LOCK(fl); 1976 find_best_refill_source(sc, fl, maxp); 1977 FL_UNLOCK(fl); 1978 } 1979#endif 1980} 1981 1982static inline int 1983mbuf_nsegs(struct mbuf *m) 1984{ 1985 1986 M_ASSERTPKTHDR(m); 1987 KASSERT(m->m_pkthdr.l5hlen > 0, 1988 ("%s: mbuf %p missing information on # of segments.", __func__, m)); 1989 1990 return (m->m_pkthdr.l5hlen); 1991} 1992 1993static inline void 1994set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) 1995{ 1996 1997 M_ASSERTPKTHDR(m); 1998 m->m_pkthdr.l5hlen = nsegs; 1999} 2000 2001static inline int 2002mbuf_len16(struct mbuf *m) 2003{ 2004 int n; 2005 2006 M_ASSERTPKTHDR(m); 2007 n = m->m_pkthdr.PH_loc.eight[0]; 2008 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2009 2010 return (n); 2011} 2012 2013static inline void 2014set_mbuf_len16(struct mbuf *m, uint8_t len16) 2015{ 2016 2017 M_ASSERTPKTHDR(m); 2018 m->m_pkthdr.PH_loc.eight[0] = len16; 2019} 2020 2021static inline int 2022needs_tso(struct mbuf *m) 2023{ 2024 2025 M_ASSERTPKTHDR(m); 2026 2027 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 2028 KASSERT(m->m_pkthdr.tso_segsz > 0, 2029 ("%s: TSO requested in mbuf %p but MSS not provided", 2030 __func__, m)); 2031 return (1); 2032 } 2033 2034 return (0); 2035} 2036 2037static inline int 2038needs_l3_csum(struct mbuf *m) 2039{ 2040 2041 M_ASSERTPKTHDR(m); 2042 2043 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) 2044 return (1); 2045 return (0); 2046} 2047 2048static inline int 2049needs_l4_csum(struct mbuf *m) 2050{ 2051 2052 M_ASSERTPKTHDR(m); 2053 2054 if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | 2055 CSUM_TCP_IPV6 | CSUM_TSO)) 2056 return (1); 2057 return (0); 2058} 2059 2060static inline int 2061needs_vlan_insertion(struct mbuf *m) 2062{ 2063 2064 M_ASSERTPKTHDR(m); 2065 2066 if (m->m_flags & M_VLANTAG) { 2067 KASSERT(m->m_pkthdr.ether_vtag != 0, 2068 ("%s: HWVLAN requested in mbuf %p but tag not provided", 2069 __func__, m)); 2070 return (1); 2071 } 2072 return (0); 2073} 2074 2075static void * 2076m_advance(struct mbuf **pm, int *poffset, int len) 2077{ 2078 struct mbuf *m = *pm; 2079 int offset = *poffset; 2080 uintptr_t p = 0; 2081 2082 MPASS(len > 0); 2083 2084 while (len) { 2085 if (offset + len < m->m_len) { 2086 offset += len; 2087 p = mtod(m, uintptr_t) + offset; 2088 break; 2089 } 2090 len -= m->m_len - offset; 2091 m = m->m_next; 2092 offset = 0; 2093 MPASS(m != NULL); 2094 } 2095 *poffset = offset; 2096 *pm = m; 2097 return ((void *)p); 2098} 2099 2100static inline int 2101same_paddr(char *a, char *b) 2102{ 2103 2104 if (a == b) 2105 return (1); 2106 else if (a != NULL && b != NULL) { 2107 vm_offset_t x = (vm_offset_t)a; 2108 vm_offset_t y = (vm_offset_t)b; 2109 2110 if ((x & PAGE_MASK) == (y & PAGE_MASK) && 2111 pmap_kextract(x) == pmap_kextract(y)) 2112 return (1); 2113 } 2114 2115 return (0); 2116} 2117 2118/* 2119 * Can deal with empty mbufs in the chain that have m_len = 0, but the chain 2120 * must have at least one mbuf that's not empty. 2121 */ 2122static inline int 2123count_mbuf_nsegs(struct mbuf *m) 2124{ 2125 char *prev_end, *start; 2126 int len, nsegs; 2127 2128 MPASS(m != NULL); 2129 2130 nsegs = 0; 2131 prev_end = NULL; 2132 for (; m; m = m->m_next) { 2133 2134 len = m->m_len; 2135 if (__predict_false(len == 0)) 2136 continue; 2137 start = mtod(m, char *); 2138 2139 nsegs += sglist_count(start, len); 2140 if (same_paddr(prev_end, start)) 2141 nsegs--; 2142 prev_end = start + len; 2143 } 2144 2145 MPASS(nsegs > 0); 2146 return (nsegs); 2147} 2148 2149/* 2150 * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: 2151 * a) caller can assume it's been freed if this function returns with an error. 2152 * b) it may get defragged up if the gather list is too long for the hardware. 2153 */ 2154int 2155parse_pkt(struct adapter *sc, struct mbuf **mp) 2156{ 2157 struct mbuf *m0 = *mp, *m; 2158 int rc, nsegs, defragged = 0, offset; 2159 struct ether_header *eh; 2160 void *l3hdr; 2161#if defined(INET) || defined(INET6) 2162 struct tcphdr *tcp; 2163#endif 2164 uint16_t eh_type; 2165 2166 M_ASSERTPKTHDR(m0); 2167 if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { 2168 rc = EINVAL; 2169fail: 2170 m_freem(m0); 2171 *mp = NULL; 2172 return (rc); 2173 } 2174restart: 2175 /* 2176 * First count the number of gather list segments in the payload. 2177 * Defrag the mbuf if nsegs exceeds the hardware limit. 2178 */ 2179 M_ASSERTPKTHDR(m0); 2180 MPASS(m0->m_pkthdr.len > 0); 2181 nsegs = count_mbuf_nsegs(m0); 2182 if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) { 2183 if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) { 2184 rc = EFBIG; 2185 goto fail; 2186 } 2187 *mp = m0 = m; /* update caller's copy after defrag */ 2188 goto restart; 2189 } 2190 2191 if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN)) { 2192 m0 = m_pullup(m0, m0->m_pkthdr.len); 2193 if (m0 == NULL) { 2194 /* Should have left well enough alone. */ 2195 rc = EFBIG; 2196 goto fail; 2197 } 2198 *mp = m0; /* update caller's copy after pullup */ 2199 goto restart; 2200 } 2201 set_mbuf_nsegs(m0, nsegs); 2202 if (sc->flags & IS_VF) 2203 set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0))); 2204 else 2205 set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); 2206 2207 if (!needs_tso(m0) && 2208 !(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0)))) 2209 return (0); 2210 2211 m = m0; 2212 eh = mtod(m, struct ether_header *); 2213 eh_type = ntohs(eh->ether_type); 2214 if (eh_type == ETHERTYPE_VLAN) { 2215 struct ether_vlan_header *evh = (void *)eh; 2216 2217 eh_type = ntohs(evh->evl_proto); 2218 m0->m_pkthdr.l2hlen = sizeof(*evh); 2219 } else 2220 m0->m_pkthdr.l2hlen = sizeof(*eh); 2221 2222 offset = 0; 2223 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2224 2225 switch (eh_type) { 2226#ifdef INET6 2227 case ETHERTYPE_IPV6: 2228 { 2229 struct ip6_hdr *ip6 = l3hdr; 2230 2231 MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP); 2232 2233 m0->m_pkthdr.l3hlen = sizeof(*ip6); 2234 break; 2235 } 2236#endif 2237#ifdef INET 2238 case ETHERTYPE_IP: 2239 { 2240 struct ip *ip = l3hdr; 2241 2242 m0->m_pkthdr.l3hlen = ip->ip_hl * 4; 2243 break; 2244 } 2245#endif 2246 default: 2247 panic("%s: ethertype 0x%04x unknown. if_cxgbe must be compiled" 2248 " with the same INET/INET6 options as the kernel.", 2249 __func__, eh_type); 2250 } 2251 2252#if defined(INET) || defined(INET6) 2253 if (needs_tso(m0)) { 2254 tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); 2255 m0->m_pkthdr.l4hlen = tcp->th_off * 4; 2256 } 2257#endif 2258 MPASS(m0 == *mp); 2259 return (0); 2260} 2261 2262void * 2263start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) 2264{ 2265 struct sge_eq *eq = &wrq->eq; 2266 struct adapter *sc = wrq->adapter; 2267 int ndesc, available; 2268 struct wrqe *wr; 2269 void *w; 2270 2271 MPASS(len16 > 0); 2272 ndesc = howmany(len16, EQ_ESIZE / 16); 2273 MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); 2274 2275 EQ_LOCK(eq); 2276 2277 if (!STAILQ_EMPTY(&wrq->wr_list)) 2278 drain_wrq_wr_list(sc, wrq); 2279 2280 if (!STAILQ_EMPTY(&wrq->wr_list)) { 2281slowpath: 2282 EQ_UNLOCK(eq); 2283 wr = alloc_wrqe(len16 * 16, wrq); 2284 if (__predict_false(wr == NULL)) 2285 return (NULL); 2286 cookie->pidx = -1; 2287 cookie->ndesc = ndesc; 2288 return (&wr->wr); 2289 } 2290 2291 eq->cidx = read_hw_cidx(eq); 2292 if (eq->pidx == eq->cidx) 2293 available = eq->sidx - 1; 2294 else 2295 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2296 if (available < ndesc) 2297 goto slowpath; 2298 2299 cookie->pidx = eq->pidx; 2300 cookie->ndesc = ndesc; 2301 TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); 2302 2303 w = &eq->desc[eq->pidx]; 2304 IDXINCR(eq->pidx, ndesc, eq->sidx); 2305 if (__predict_false(eq->pidx < ndesc - 1)) { 2306 w = &wrq->ss[0]; 2307 wrq->ss_pidx = cookie->pidx; 2308 wrq->ss_len = len16 * 16; 2309 } 2310 2311 EQ_UNLOCK(eq); 2312 2313 return (w); 2314} 2315 2316void 2317commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) 2318{ 2319 struct sge_eq *eq = &wrq->eq; 2320 struct adapter *sc = wrq->adapter; 2321 int ndesc, pidx; 2322 struct wrq_cookie *prev, *next; 2323 2324 if (cookie->pidx == -1) { 2325 struct wrqe *wr = __containerof(w, struct wrqe, wr); 2326 2327 t4_wrq_tx(sc, wr); 2328 return; 2329 } 2330 2331 ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ 2332 pidx = cookie->pidx; 2333 MPASS(pidx >= 0 && pidx < eq->sidx); 2334 if (__predict_false(w == &wrq->ss[0])) { 2335 int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; 2336 2337 MPASS(wrq->ss_len > n); /* WR had better wrap around. */ 2338 bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); 2339 bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); 2340 wrq->tx_wrs_ss++; 2341 } else 2342 wrq->tx_wrs_direct++; 2343 2344 EQ_LOCK(eq); 2345 prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); 2346 next = TAILQ_NEXT(cookie, link); 2347 if (prev == NULL) { 2348 MPASS(pidx == eq->dbidx); 2349 if (next == NULL || ndesc >= 16) 2350 ring_eq_db(wrq->adapter, eq, ndesc); 2351 else { 2352 MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); 2353 next->pidx = pidx; 2354 next->ndesc += ndesc; 2355 } 2356 } else { 2357 MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); 2358 prev->ndesc += ndesc; 2359 } 2360 TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); 2361 2362 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2363 drain_wrq_wr_list(sc, wrq); 2364 2365#ifdef INVARIANTS 2366 if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { 2367 /* Doorbell must have caught up to the pidx. */ 2368 MPASS(wrq->eq.pidx == wrq->eq.dbidx); 2369 } 2370#endif 2371 EQ_UNLOCK(eq); 2372} 2373 2374static u_int 2375can_resume_eth_tx(struct mp_ring *r) 2376{ 2377 struct sge_eq *eq = r->cookie; 2378 2379 return (total_available_tx_desc(eq) > eq->sidx / 8); 2380} 2381 2382static inline int 2383cannot_use_txpkts(struct mbuf *m) 2384{ 2385 /* maybe put a GL limit too, to avoid silliness? */ 2386 2387 return (needs_tso(m)); 2388} 2389 2390/* 2391 * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to 2392 * be consumed. Return the actual number consumed. 0 indicates a stall. 2393 */ 2394static u_int 2395eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) 2396{ 2397 struct sge_txq *txq = r->cookie; 2398 struct sge_eq *eq = &txq->eq; 2399 struct ifnet *ifp = txq->ifp; 2400 struct vi_info *vi = ifp->if_softc; 2401 struct port_info *pi = vi->pi; 2402 struct adapter *sc = pi->adapter; 2403 u_int total, remaining; /* # of packets */ 2404 u_int available, dbdiff; /* # of hardware descriptors */ 2405 u_int n, next_cidx; 2406 struct mbuf *m0, *tail; 2407 struct txpkts txp; 2408 struct fw_eth_tx_pkts_wr *wr; /* any fw WR struct will do */ 2409 2410 remaining = IDXDIFF(pidx, cidx, r->size); 2411 MPASS(remaining > 0); /* Must not be called without work to do. */ 2412 total = 0; 2413 2414 TXQ_LOCK(txq); 2415 if (__predict_false((eq->flags & EQ_ENABLED) == 0)) { 2416 while (cidx != pidx) { 2417 m0 = r->items[cidx]; 2418 m_freem(m0); 2419 if (++cidx == r->size) 2420 cidx = 0; 2421 } 2422 reclaim_tx_descs(txq, 2048); 2423 total = remaining; 2424 goto done; 2425 } 2426 2427 /* How many hardware descriptors do we have readily available. */ 2428 if (eq->pidx == eq->cidx) 2429 available = eq->sidx - 1; 2430 else 2431 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2432 dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx); 2433 2434 while (remaining > 0) { 2435 2436 m0 = r->items[cidx]; 2437 M_ASSERTPKTHDR(m0); 2438 MPASS(m0->m_nextpkt == NULL); 2439 2440 if (available < SGE_MAX_WR_NDESC) { 2441 available += reclaim_tx_descs(txq, 64); 2442 if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16)) 2443 break; /* out of descriptors */ 2444 } 2445 2446 next_cidx = cidx + 1; 2447 if (__predict_false(next_cidx == r->size)) 2448 next_cidx = 0; 2449 2450 wr = (void *)&eq->desc[eq->pidx]; 2451 if (sc->flags & IS_VF) { 2452 total++; 2453 remaining--; 2454 ETHER_BPF_MTAP(ifp, m0); 2455 n = write_txpkt_vm_wr(txq, (void *)wr, m0, available); 2456 } else if (remaining > 1 && 2457 try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) { 2458 2459 /* pkts at cidx, next_cidx should both be in txp. */ 2460 MPASS(txp.npkt == 2); 2461 tail = r->items[next_cidx]; 2462 MPASS(tail->m_nextpkt == NULL); 2463 ETHER_BPF_MTAP(ifp, m0); 2464 ETHER_BPF_MTAP(ifp, tail); 2465 m0->m_nextpkt = tail; 2466 2467 if (__predict_false(++next_cidx == r->size)) 2468 next_cidx = 0; 2469 2470 while (next_cidx != pidx) { 2471 if (add_to_txpkts(r->items[next_cidx], &txp, 2472 available) != 0) 2473 break; 2474 tail->m_nextpkt = r->items[next_cidx]; 2475 tail = tail->m_nextpkt; 2476 ETHER_BPF_MTAP(ifp, tail); 2477 if (__predict_false(++next_cidx == r->size)) 2478 next_cidx = 0; 2479 } 2480 2481 n = write_txpkts_wr(txq, wr, m0, &txp, available); 2482 total += txp.npkt; 2483 remaining -= txp.npkt; 2484 } else { 2485 total++; 2486 remaining--; 2487 ETHER_BPF_MTAP(ifp, m0); 2488 n = write_txpkt_wr(txq, (void *)wr, m0, available); 2489 } 2490 MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); 2491 2492 available -= n; 2493 dbdiff += n; 2494 IDXINCR(eq->pidx, n, eq->sidx); 2495 2496 if (total_available_tx_desc(eq) < eq->sidx / 4 && 2497 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2498 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2499 F_FW_WR_EQUEQ); 2500 eq->equeqidx = eq->pidx; 2501 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 2502 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 2503 eq->equeqidx = eq->pidx; 2504 } 2505 2506 if (dbdiff >= 16 && remaining >= 4) { 2507 ring_eq_db(sc, eq, dbdiff); 2508 available += reclaim_tx_descs(txq, 4 * dbdiff); 2509 dbdiff = 0; 2510 } 2511 2512 cidx = next_cidx; 2513 } 2514 if (dbdiff != 0) { 2515 ring_eq_db(sc, eq, dbdiff); 2516 reclaim_tx_descs(txq, 32); 2517 } 2518done: 2519 TXQ_UNLOCK(txq); 2520 2521 return (total); 2522} 2523 2524static inline void 2525init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 2526 int qsize) 2527{ 2528 2529 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 2530 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 2531 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 2532 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 2533 2534 iq->flags = 0; 2535 iq->adapter = sc; 2536 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 2537 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 2538 if (pktc_idx >= 0) { 2539 iq->intr_params |= F_QINTR_CNT_EN; 2540 iq->intr_pktc_idx = pktc_idx; 2541 } 2542 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ 2543 iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; 2544} 2545 2546static inline void 2547init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) 2548{ 2549 2550 fl->qsize = qsize; 2551 fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 2552 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 2553 if (sc->flags & BUF_PACKING_OK && 2554 ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ 2555 (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ 2556 fl->flags |= FL_BUF_PACKING; 2557 find_best_refill_source(sc, fl, maxp); 2558 find_safe_refill_source(sc, fl); 2559} 2560 2561static inline void 2562init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, 2563 uint8_t tx_chan, uint16_t iqid, char *name) 2564{ 2565 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); 2566 2567 eq->flags = eqtype & EQ_TYPEMASK; 2568 eq->tx_chan = tx_chan; 2569 eq->iqid = iqid; 2570 eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 2571 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 2572} 2573 2574static int 2575alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 2576 bus_dmamap_t *map, bus_addr_t *pa, void **va) 2577{ 2578 int rc; 2579 2580 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 2581 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 2582 if (rc != 0) { 2583 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 2584 goto done; 2585 } 2586 2587 rc = bus_dmamem_alloc(*tag, va, 2588 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 2589 if (rc != 0) { 2590 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 2591 goto done; 2592 } 2593 2594 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 2595 if (rc != 0) { 2596 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 2597 goto done; 2598 } 2599done: 2600 if (rc) 2601 free_ring(sc, *tag, *map, *pa, *va); 2602 2603 return (rc); 2604} 2605 2606static int 2607free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 2608 bus_addr_t pa, void *va) 2609{ 2610 if (pa) 2611 bus_dmamap_unload(tag, map); 2612 if (va) 2613 bus_dmamem_free(tag, va, map); 2614 if (tag) 2615 bus_dma_tag_destroy(tag); 2616 2617 return (0); 2618} 2619 2620/* 2621 * Allocates the ring for an ingress queue and an optional freelist. If the 2622 * freelist is specified it will be allocated and then associated with the 2623 * ingress queue. 2624 * 2625 * Returns errno on failure. Resources allocated up to that point may still be 2626 * allocated. Caller is responsible for cleanup in case this function fails. 2627 * 2628 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then 2629 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies 2630 * the abs_id of the ingress queue to which its interrupts should be forwarded. 2631 */ 2632static int 2633alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, 2634 int intr_idx, int cong) 2635{ 2636 int rc, i, cntxt_id; 2637 size_t len; 2638 struct fw_iq_cmd c; 2639 struct port_info *pi = vi->pi; 2640 struct adapter *sc = iq->adapter; 2641 struct sge_params *sp = &sc->params.sge; 2642 __be32 v = 0; 2643 2644 len = iq->qsize * IQ_ESIZE; 2645 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 2646 (void **)&iq->desc); 2647 if (rc != 0) 2648 return (rc); 2649 2650 bzero(&c, sizeof(c)); 2651 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 2652 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 2653 V_FW_IQ_CMD_VFN(0)); 2654 2655 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 2656 FW_LEN16(c)); 2657 2658 /* Special handling for firmware event queue */ 2659 if (iq == &sc->sge.fwq) 2660 v |= F_FW_IQ_CMD_IQASYNCH; 2661 2662 if (iq->flags & IQ_INTR) { 2663 KASSERT(intr_idx < sc->intr_count, 2664 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 2665 } else 2666 v |= F_FW_IQ_CMD_IQANDST; 2667 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 2668 2669 c.type_to_iqandstindex = htobe32(v | 2670 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 2671 V_FW_IQ_CMD_VIID(vi->viid) | 2672 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 2673 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 2674 F_FW_IQ_CMD_IQGTSMODE | 2675 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 2676 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 2677 c.iqsize = htobe16(iq->qsize); 2678 c.iqaddr = htobe64(iq->ba); 2679 if (cong >= 0) 2680 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 2681 2682 if (fl) { 2683 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 2684 2685 len = fl->qsize * EQ_ESIZE; 2686 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 2687 &fl->ba, (void **)&fl->desc); 2688 if (rc) 2689 return (rc); 2690 2691 /* Allocate space for one software descriptor per buffer. */ 2692 rc = alloc_fl_sdesc(fl); 2693 if (rc != 0) { 2694 device_printf(sc->dev, 2695 "failed to setup fl software descriptors: %d\n", 2696 rc); 2697 return (rc); 2698 } 2699 2700 if (fl->flags & FL_BUF_PACKING) { 2701 fl->lowat = roundup2(sp->fl_starve_threshold2, 8); 2702 fl->buf_boundary = sp->pack_boundary; 2703 } else { 2704 fl->lowat = roundup2(sp->fl_starve_threshold, 8); 2705 fl->buf_boundary = 16; 2706 } 2707 if (fl_pad && fl->buf_boundary < sp->pad_boundary) 2708 fl->buf_boundary = sp->pad_boundary; 2709 2710 c.iqns_to_fl0congen |= 2711 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 2712 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 2713 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 2714 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 2715 0)); 2716 if (cong >= 0) { 2717 c.iqns_to_fl0congen |= 2718 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 2719 F_FW_IQ_CMD_FL0CONGCIF | 2720 F_FW_IQ_CMD_FL0CONGEN); 2721 } 2722 c.fl0dcaen_to_fl0cidxfthresh = 2723 htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_128B) | 2724 V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B)); 2725 c.fl0size = htobe16(fl->qsize); 2726 c.fl0addr = htobe64(fl->ba); 2727 } 2728 2729 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2730 if (rc != 0) { 2731 device_printf(sc->dev, 2732 "failed to create ingress queue: %d\n", rc); 2733 return (rc); 2734 } 2735 2736 iq->cidx = 0; 2737 iq->gen = F_RSPD_GEN; 2738 iq->intr_next = iq->intr_params; 2739 iq->cntxt_id = be16toh(c.iqid); 2740 iq->abs_id = be16toh(c.physiqid); 2741 iq->flags |= IQ_ALLOCATED; 2742 2743 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 2744 if (cntxt_id >= sc->sge.niq) { 2745 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 2746 cntxt_id, sc->sge.niq - 1); 2747 } 2748 sc->sge.iqmap[cntxt_id] = iq; 2749 2750 if (fl) { 2751 u_int qid; 2752 2753 iq->flags |= IQ_HAS_FL; 2754 fl->cntxt_id = be16toh(c.fl0id); 2755 fl->pidx = fl->cidx = 0; 2756 2757 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 2758 if (cntxt_id >= sc->sge.neq) { 2759 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 2760 __func__, cntxt_id, sc->sge.neq - 1); 2761 } 2762 sc->sge.eqmap[cntxt_id] = (void *)fl; 2763 2764 qid = fl->cntxt_id; 2765 if (isset(&sc->doorbells, DOORBELL_UDB)) { 2766 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 2767 uint32_t mask = (1 << s_qpp) - 1; 2768 volatile uint8_t *udb; 2769 2770 udb = sc->udbs_base + UDBS_DB_OFFSET; 2771 udb += (qid >> s_qpp) << PAGE_SHIFT; 2772 qid &= mask; 2773 if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { 2774 udb += qid << UDBS_SEG_SHIFT; 2775 qid = 0; 2776 } 2777 fl->udb = (volatile void *)udb; 2778 } 2779 fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; 2780 2781 FL_LOCK(fl); 2782 /* Enough to make sure the SGE doesn't think it's starved */ 2783 refill_fl(sc, fl, fl->lowat); 2784 FL_UNLOCK(fl); 2785 } 2786 2787 if (is_t5(sc) && !(sc->flags & IS_VF) && cong >= 0) { 2788 uint32_t param, val; 2789 2790 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 2791 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 2792 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 2793 if (cong == 0) 2794 val = 1 << 19; 2795 else { 2796 val = 2 << 19; 2797 for (i = 0; i < 4; i++) { 2798 if (cong & (1 << i)) 2799 val |= 1 << (i << 2); 2800 } 2801 } 2802 2803 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 2804 if (rc != 0) { 2805 /* report error but carry on */ 2806 device_printf(sc->dev, 2807 "failed to set congestion manager context for " 2808 "ingress queue %d: %d\n", iq->cntxt_id, rc); 2809 } 2810 } 2811 2812 /* Enable IQ interrupts */ 2813 atomic_store_rel_int(&iq->state, IQS_IDLE); 2814 t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | 2815 V_INGRESSQID(iq->cntxt_id)); 2816 2817 return (0); 2818} 2819 2820static int 2821free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) 2822{ 2823 int rc; 2824 struct adapter *sc = iq->adapter; 2825 device_t dev; 2826 2827 if (sc == NULL) 2828 return (0); /* nothing to do */ 2829 2830 dev = vi ? vi->dev : sc->dev; 2831 2832 if (iq->flags & IQ_ALLOCATED) { 2833 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 2834 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 2835 fl ? fl->cntxt_id : 0xffff, 0xffff); 2836 if (rc != 0) { 2837 device_printf(dev, 2838 "failed to free queue %p: %d\n", iq, rc); 2839 return (rc); 2840 } 2841 iq->flags &= ~IQ_ALLOCATED; 2842 } 2843 2844 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 2845 2846 bzero(iq, sizeof(*iq)); 2847 2848 if (fl) { 2849 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 2850 fl->desc); 2851 2852 if (fl->sdesc) 2853 free_fl_sdesc(sc, fl); 2854 2855 if (mtx_initialized(&fl->fl_lock)) 2856 mtx_destroy(&fl->fl_lock); 2857 2858 bzero(fl, sizeof(*fl)); 2859 } 2860 2861 return (0); 2862} 2863 2864static void 2865add_fl_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 2866 struct sge_fl *fl) 2867{ 2868 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2869 2870 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, 2871 "freelist"); 2872 children = SYSCTL_CHILDREN(oid); 2873 2874 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 2875 CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I", 2876 "SGE context id of the freelist"); 2877 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, 2878 fl_pad ? 1 : 0, "padding enabled"); 2879 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, 2880 fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); 2881 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 2882 0, "consumer index"); 2883 if (fl->flags & FL_BUF_PACKING) { 2884 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", 2885 CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); 2886 } 2887 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 2888 0, "producer index"); 2889 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated", 2890 CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated"); 2891 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined", 2892 CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters"); 2893 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", 2894 CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); 2895 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", 2896 CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); 2897 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", 2898 CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); 2899} 2900 2901static int 2902alloc_fwq(struct adapter *sc) 2903{ 2904 int rc, intr_idx; 2905 struct sge_iq *fwq = &sc->sge.fwq; 2906 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 2907 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2908 2909 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); 2910 fwq->flags |= IQ_INTR; /* always */ 2911 if (sc->flags & IS_VF) 2912 intr_idx = 0; 2913 else { 2914 intr_idx = sc->intr_count > 1 ? 1 : 0; 2915 fwq->set_tcb_rpl = t4_filter_rpl; 2916 fwq->l2t_write_rpl = do_l2t_write_rpl; 2917 } 2918 rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); 2919 if (rc != 0) { 2920 device_printf(sc->dev, 2921 "failed to create firmware event queue: %d\n", rc); 2922 return (rc); 2923 } 2924 2925 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, 2926 NULL, "firmware event queue"); 2927 children = SYSCTL_CHILDREN(oid); 2928 2929 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id", 2930 CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I", 2931 "absolute id of the queue"); 2932 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id", 2933 CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I", 2934 "SGE context id of the queue"); 2935 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", 2936 CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", 2937 "consumer index"); 2938 2939 return (0); 2940} 2941 2942static int 2943free_fwq(struct adapter *sc) 2944{ 2945 return free_iq_fl(NULL, &sc->sge.fwq, NULL); 2946} 2947 2948static int 2949alloc_mgmtq(struct adapter *sc) 2950{ 2951 int rc; 2952 struct sge_wrq *mgmtq = &sc->sge.mgmtq; 2953 char name[16]; 2954 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 2955 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2956 2957 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD, 2958 NULL, "management queue"); 2959 2960 snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev)); 2961 init_eq(sc, &mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, 2962 sc->sge.fwq.cntxt_id, name); 2963 rc = alloc_wrq(sc, NULL, mgmtq, oid); 2964 if (rc != 0) { 2965 device_printf(sc->dev, 2966 "failed to create management queue: %d\n", rc); 2967 return (rc); 2968 } 2969 2970 return (0); 2971} 2972 2973static int 2974free_mgmtq(struct adapter *sc) 2975{ 2976 2977 return free_wrq(sc, &sc->sge.mgmtq); 2978} 2979 2980int 2981tnl_cong(struct port_info *pi, int drop) 2982{ 2983 2984 if (drop == -1) 2985 return (-1); 2986 else if (drop == 1) 2987 return (0); 2988 else 2989 return (pi->rx_chan_map); 2990} 2991 2992static int 2993alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, 2994 struct sysctl_oid *oid) 2995{ 2996 int rc; 2997 struct adapter *sc = vi->pi->adapter; 2998 struct sysctl_oid_list *children; 2999 char name[16]; 3000 3001 rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx, 3002 tnl_cong(vi->pi, cong_drop)); 3003 if (rc != 0) 3004 return (rc); 3005 3006 if (idx == 0) 3007 sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; 3008 else 3009 KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, 3010 ("iq_base mismatch")); 3011 KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, 3012 ("PF with non-zero iq_base")); 3013 3014 /* 3015 * The freelist is just barely above the starvation threshold right now, 3016 * fill it up a bit more. 3017 */ 3018 FL_LOCK(&rxq->fl); 3019 refill_fl(sc, &rxq->fl, 128); 3020 FL_UNLOCK(&rxq->fl); 3021 3022#if defined(INET) || defined(INET6) 3023 rc = tcp_lro_init(&rxq->lro); 3024 if (rc != 0) 3025 return (rc); 3026 rxq->lro.ifp = vi->ifp; /* also indicates LRO init'ed */ 3027 3028 if (vi->ifp->if_capenable & IFCAP_LRO) 3029 rxq->iq.flags |= IQ_LRO_ENABLED; 3030#endif 3031 rxq->ifp = vi->ifp; 3032 3033 children = SYSCTL_CHILDREN(oid); 3034 3035 snprintf(name, sizeof(name), "%d", idx); 3036 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3037 NULL, "rx queue"); 3038 children = SYSCTL_CHILDREN(oid); 3039 3040 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "abs_id", 3041 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I", 3042 "absolute id of the queue"); 3043 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cntxt_id", 3044 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I", 3045 "SGE context id of the queue"); 3046 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3047 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I", 3048 "consumer index"); 3049#if defined(INET) || defined(INET6) 3050 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 3051 &rxq->lro.lro_queued, 0, NULL); 3052 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 3053 &rxq->lro.lro_flushed, 0, NULL); 3054#endif 3055 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 3056 &rxq->rxcsum, "# of times hardware assisted with checksum"); 3057 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction", 3058 CTLFLAG_RD, &rxq->vlan_extraction, 3059 "# of times hardware extracted 802.1Q tag"); 3060 3061 add_fl_sysctls(&vi->ctx, oid, &rxq->fl); 3062 3063 return (rc); 3064} 3065 3066static int 3067free_rxq(struct vi_info *vi, struct sge_rxq *rxq) 3068{ 3069 int rc; 3070 3071#if defined(INET) || defined(INET6) 3072 if (rxq->lro.ifp) { 3073 tcp_lro_free(&rxq->lro); 3074 rxq->lro.ifp = NULL; 3075 } 3076#endif 3077 3078 rc = free_iq_fl(vi, &rxq->iq, &rxq->fl); 3079 if (rc == 0) 3080 bzero(rxq, sizeof(*rxq)); 3081 3082 return (rc); 3083} 3084 3085#ifdef TCP_OFFLOAD 3086static int 3087alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, 3088 int intr_idx, int idx, struct sysctl_oid *oid) 3089{ 3090 int rc; 3091 struct sysctl_oid_list *children; 3092 char name[16]; 3093 3094 rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 3095 vi->pi->rx_chan_map); 3096 if (rc != 0) 3097 return (rc); 3098 3099 children = SYSCTL_CHILDREN(oid); 3100 3101 snprintf(name, sizeof(name), "%d", idx); 3102 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3103 NULL, "rx queue"); 3104 children = SYSCTL_CHILDREN(oid); 3105 3106 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "abs_id", 3107 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16, 3108 "I", "absolute id of the queue"); 3109 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cntxt_id", 3110 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16, 3111 "I", "SGE context id of the queue"); 3112 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3113 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I", 3114 "consumer index"); 3115 3116 add_fl_sysctls(&vi->ctx, oid, &ofld_rxq->fl); 3117 3118 return (rc); 3119} 3120 3121static int 3122free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) 3123{ 3124 int rc; 3125 3126 rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl); 3127 if (rc == 0) 3128 bzero(ofld_rxq, sizeof(*ofld_rxq)); 3129 3130 return (rc); 3131} 3132#endif 3133 3134#ifdef DEV_NETMAP 3135static int 3136alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, 3137 int idx, struct sysctl_oid *oid) 3138{ 3139 int rc; 3140 struct sysctl_oid_list *children; 3141 struct sysctl_ctx_list *ctx; 3142 char name[16]; 3143 size_t len; 3144 struct adapter *sc = vi->pi->adapter; 3145 struct netmap_adapter *na = NA(vi->ifp); 3146 3147 MPASS(na != NULL); 3148 3149 len = vi->qsize_rxq * IQ_ESIZE; 3150 rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, 3151 &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); 3152 if (rc != 0) 3153 return (rc); 3154 3155 len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3156 rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map, 3157 &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc); 3158 if (rc != 0) 3159 return (rc); 3160 3161 nm_rxq->vi = vi; 3162 nm_rxq->nid = idx; 3163 nm_rxq->iq_cidx = 0; 3164 nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE; 3165 nm_rxq->iq_gen = F_RSPD_GEN; 3166 nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; 3167 nm_rxq->fl_sidx = na->num_rx_desc; 3168 nm_rxq->intr_idx = intr_idx; 3169 3170 ctx = &vi->ctx; 3171 children = SYSCTL_CHILDREN(oid); 3172 3173 snprintf(name, sizeof(name), "%d", idx); 3174 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, 3175 "rx queue"); 3176 children = SYSCTL_CHILDREN(oid); 3177 3178 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", 3179 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_abs_id, 0, sysctl_uint16, 3180 "I", "absolute id of the queue"); 3181 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3182 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cntxt_id, 0, sysctl_uint16, 3183 "I", "SGE context id of the queue"); 3184 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3185 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cidx, 0, sysctl_uint16, "I", 3186 "consumer index"); 3187 3188 children = SYSCTL_CHILDREN(oid); 3189 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, 3190 "freelist"); 3191 children = SYSCTL_CHILDREN(oid); 3192 3193 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3194 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->fl_cntxt_id, 0, sysctl_uint16, 3195 "I", "SGE context id of the freelist"); 3196 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 3197 &nm_rxq->fl_cidx, 0, "consumer index"); 3198 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 3199 &nm_rxq->fl_pidx, 0, "producer index"); 3200 3201 return (rc); 3202} 3203 3204 3205static int 3206free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) 3207{ 3208 struct adapter *sc = vi->pi->adapter; 3209 3210 free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, 3211 nm_rxq->iq_desc); 3212 free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba, 3213 nm_rxq->fl_desc); 3214 3215 return (0); 3216} 3217 3218static int 3219alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx, 3220 struct sysctl_oid *oid) 3221{ 3222 int rc; 3223 size_t len; 3224 struct port_info *pi = vi->pi; 3225 struct adapter *sc = pi->adapter; 3226 struct netmap_adapter *na = NA(vi->ifp); 3227 char name[16]; 3228 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3229 3230 len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3231 rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map, 3232 &nm_txq->ba, (void **)&nm_txq->desc); 3233 if (rc) 3234 return (rc); 3235 3236 nm_txq->pidx = nm_txq->cidx = 0; 3237 nm_txq->sidx = na->num_tx_desc; 3238 nm_txq->nid = idx; 3239 nm_txq->iqidx = iqidx; 3240 nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3241 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_VF_VLD(1) | 3242 V_TXPKT_VF(vi->viid)); 3243 3244 snprintf(name, sizeof(name), "%d", idx); 3245 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3246 NULL, "netmap tx queue"); 3247 children = SYSCTL_CHILDREN(oid); 3248 3249 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3250 &nm_txq->cntxt_id, 0, "SGE context id of the queue"); 3251 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3252 CTLTYPE_INT | CTLFLAG_RD, &nm_txq->cidx, 0, sysctl_uint16, "I", 3253 "consumer index"); 3254 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 3255 CTLTYPE_INT | CTLFLAG_RD, &nm_txq->pidx, 0, sysctl_uint16, "I", 3256 "producer index"); 3257 3258 return (rc); 3259} 3260 3261static int 3262free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 3263{ 3264 struct adapter *sc = vi->pi->adapter; 3265 3266 free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, 3267 nm_txq->desc); 3268 3269 return (0); 3270} 3271#endif 3272 3273static int 3274ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 3275{ 3276 int rc, cntxt_id; 3277 struct fw_eq_ctrl_cmd c; 3278 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3279 3280 bzero(&c, sizeof(c)); 3281 3282 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 3283 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 3284 V_FW_EQ_CTRL_CMD_VFN(0)); 3285 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 3286 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 3287 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); 3288 c.physeqid_pkd = htobe32(0); 3289 c.fetchszm_to_iqid = 3290 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 3291 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 3292 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 3293 c.dcaen_to_eqsize = 3294 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3295 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3296 V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); 3297 c.eqaddr = htobe64(eq->ba); 3298 3299 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3300 if (rc != 0) { 3301 device_printf(sc->dev, 3302 "failed to create control queue %d: %d\n", eq->tx_chan, rc); 3303 return (rc); 3304 } 3305 eq->flags |= EQ_ALLOCATED; 3306 3307 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 3308 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3309 if (cntxt_id >= sc->sge.neq) 3310 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3311 cntxt_id, sc->sge.neq - 1); 3312 sc->sge.eqmap[cntxt_id] = eq; 3313 3314 return (rc); 3315} 3316 3317static int 3318eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3319{ 3320 int rc, cntxt_id; 3321 struct fw_eq_eth_cmd c; 3322 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3323 3324 bzero(&c, sizeof(c)); 3325 3326 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 3327 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 3328 V_FW_EQ_ETH_CMD_VFN(0)); 3329 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 3330 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 3331 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 3332 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 3333 c.fetchszm_to_iqid = 3334 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 3335 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 3336 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 3337 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3338 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3339 V_FW_EQ_ETH_CMD_EQSIZE(qsize)); 3340 c.eqaddr = htobe64(eq->ba); 3341 3342 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3343 if (rc != 0) { 3344 device_printf(vi->dev, 3345 "failed to create Ethernet egress queue: %d\n", rc); 3346 return (rc); 3347 } 3348 eq->flags |= EQ_ALLOCATED; 3349 3350 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 3351 eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 3352 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3353 if (cntxt_id >= sc->sge.neq) 3354 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3355 cntxt_id, sc->sge.neq - 1); 3356 sc->sge.eqmap[cntxt_id] = eq; 3357 3358 return (rc); 3359} 3360 3361#ifdef TCP_OFFLOAD 3362static int 3363ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3364{ 3365 int rc, cntxt_id; 3366 struct fw_eq_ofld_cmd c; 3367 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3368 3369 bzero(&c, sizeof(c)); 3370 3371 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 3372 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 3373 V_FW_EQ_OFLD_CMD_VFN(0)); 3374 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 3375 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 3376 c.fetchszm_to_iqid = 3377 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 3378 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 3379 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 3380 c.dcaen_to_eqsize = 3381 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3382 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3383 V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); 3384 c.eqaddr = htobe64(eq->ba); 3385 3386 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3387 if (rc != 0) { 3388 device_printf(vi->dev, 3389 "failed to create egress queue for TCP offload: %d\n", rc); 3390 return (rc); 3391 } 3392 eq->flags |= EQ_ALLOCATED; 3393 3394 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 3395 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3396 if (cntxt_id >= sc->sge.neq) 3397 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3398 cntxt_id, sc->sge.neq - 1); 3399 sc->sge.eqmap[cntxt_id] = eq; 3400 3401 return (rc); 3402} 3403#endif 3404 3405static int 3406alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3407{ 3408 int rc, qsize; 3409 size_t len; 3410 3411 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 3412 3413 qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3414 len = qsize * EQ_ESIZE; 3415 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 3416 &eq->ba, (void **)&eq->desc); 3417 if (rc) 3418 return (rc); 3419 3420 eq->pidx = eq->cidx = 0; 3421 eq->equeqidx = eq->dbidx = 0; 3422 eq->doorbells = sc->doorbells; 3423 3424 switch (eq->flags & EQ_TYPEMASK) { 3425 case EQ_CTRL: 3426 rc = ctrl_eq_alloc(sc, eq); 3427 break; 3428 3429 case EQ_ETH: 3430 rc = eth_eq_alloc(sc, vi, eq); 3431 break; 3432 3433#ifdef TCP_OFFLOAD 3434 case EQ_OFLD: 3435 rc = ofld_eq_alloc(sc, vi, eq); 3436 break; 3437#endif 3438 3439 default: 3440 panic("%s: invalid eq type %d.", __func__, 3441 eq->flags & EQ_TYPEMASK); 3442 } 3443 if (rc != 0) { 3444 device_printf(sc->dev, 3445 "failed to allocate egress queue(%d): %d\n", 3446 eq->flags & EQ_TYPEMASK, rc); 3447 } 3448 3449 if (isset(&eq->doorbells, DOORBELL_UDB) || 3450 isset(&eq->doorbells, DOORBELL_UDBWC) || 3451 isset(&eq->doorbells, DOORBELL_WCWR)) { 3452 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 3453 uint32_t mask = (1 << s_qpp) - 1; 3454 volatile uint8_t *udb; 3455 3456 udb = sc->udbs_base + UDBS_DB_OFFSET; 3457 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 3458 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 3459 if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 3460 clrbit(&eq->doorbells, DOORBELL_WCWR); 3461 else { 3462 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 3463 eq->udb_qid = 0; 3464 } 3465 eq->udb = (volatile void *)udb; 3466 } 3467 3468 return (rc); 3469} 3470 3471static int 3472free_eq(struct adapter *sc, struct sge_eq *eq) 3473{ 3474 int rc; 3475 3476 if (eq->flags & EQ_ALLOCATED) { 3477 switch (eq->flags & EQ_TYPEMASK) { 3478 case EQ_CTRL: 3479 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, 3480 eq->cntxt_id); 3481 break; 3482 3483 case EQ_ETH: 3484 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, 3485 eq->cntxt_id); 3486 break; 3487 3488#ifdef TCP_OFFLOAD 3489 case EQ_OFLD: 3490 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, 3491 eq->cntxt_id); 3492 break; 3493#endif 3494 3495 default: 3496 panic("%s: invalid eq type %d.", __func__, 3497 eq->flags & EQ_TYPEMASK); 3498 } 3499 if (rc != 0) { 3500 device_printf(sc->dev, 3501 "failed to free egress queue (%d): %d\n", 3502 eq->flags & EQ_TYPEMASK, rc); 3503 return (rc); 3504 } 3505 eq->flags &= ~EQ_ALLOCATED; 3506 } 3507 3508 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 3509 3510 if (mtx_initialized(&eq->eq_lock)) 3511 mtx_destroy(&eq->eq_lock); 3512 3513 bzero(eq, sizeof(*eq)); 3514 return (0); 3515} 3516 3517static int 3518alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, 3519 struct sysctl_oid *oid) 3520{ 3521 int rc; 3522 struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx; 3523 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3524 3525 rc = alloc_eq(sc, vi, &wrq->eq); 3526 if (rc) 3527 return (rc); 3528 3529 wrq->adapter = sc; 3530 TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); 3531 TAILQ_INIT(&wrq->incomplete_wrs); 3532 STAILQ_INIT(&wrq->wr_list); 3533 wrq->nwr_pending = 0; 3534 wrq->ndesc_needed = 0; 3535 3536 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3537 &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); 3538 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3539 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", 3540 "consumer index"); 3541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", 3542 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", 3543 "producer index"); 3544 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, 3545 &wrq->tx_wrs_direct, "# of work requests (direct)"); 3546 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, 3547 &wrq->tx_wrs_copied, "# of work requests (copied)"); 3548 3549 return (rc); 3550} 3551 3552static int 3553free_wrq(struct adapter *sc, struct sge_wrq *wrq) 3554{ 3555 int rc; 3556 3557 rc = free_eq(sc, &wrq->eq); 3558 if (rc) 3559 return (rc); 3560 3561 bzero(wrq, sizeof(*wrq)); 3562 return (0); 3563} 3564 3565static int 3566alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, 3567 struct sysctl_oid *oid) 3568{ 3569 int rc; 3570 struct port_info *pi = vi->pi; 3571 struct adapter *sc = pi->adapter; 3572 struct sge_eq *eq = &txq->eq; 3573 char name[16]; 3574 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3575 3576 rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx, 3577 M_CXGBE, M_WAITOK); 3578 if (rc != 0) { 3579 device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc); 3580 return (rc); 3581 } 3582 3583 rc = alloc_eq(sc, vi, eq); 3584 if (rc != 0) { 3585 mp_ring_free(txq->r); 3586 txq->r = NULL; 3587 return (rc); 3588 } 3589 3590 /* Can't fail after this point. */ 3591 3592 if (idx == 0) 3593 sc->sge.eq_base = eq->abs_id - eq->cntxt_id; 3594 else 3595 KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, 3596 ("eq_base mismatch")); 3597 KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, 3598 ("PF with non-zero eq_base")); 3599 3600 TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); 3601 txq->ifp = vi->ifp; 3602 txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); 3603 if (sc->flags & IS_VF) 3604 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 3605 V_TXPKT_INTF(pi->tx_chan)); 3606 else 3607 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3608 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_VF_VLD(1) | 3609 V_TXPKT_VF(vi->viid)); 3610 txq->tc_idx = -1; 3611 txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, 3612 M_ZERO | M_WAITOK); 3613 3614 snprintf(name, sizeof(name), "%d", idx); 3615 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3616 NULL, "tx queue"); 3617 children = SYSCTL_CHILDREN(oid); 3618 3619 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 3620 &eq->abs_id, 0, "absolute id of the queue"); 3621 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3622 &eq->cntxt_id, 0, "SGE context id of the queue"); 3623 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3624 CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", 3625 "consumer index"); 3626 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 3627 CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", 3628 "producer index"); 3629 3630 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc", 3631 CTLTYPE_INT | CTLFLAG_RW, vi, idx, sysctl_tc, "I", 3632 "traffic class (-1 means none)"); 3633 3634 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 3635 &txq->txcsum, "# of times hardware assisted with checksum"); 3636 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", 3637 CTLFLAG_RD, &txq->vlan_insertion, 3638 "# of times hardware inserted 802.1Q tag"); 3639 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 3640 &txq->tso_wrs, "# of TSO work requests"); 3641 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 3642 &txq->imm_wrs, "# of work requests with immediate data"); 3643 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 3644 &txq->sgl_wrs, "# of work requests with direct SGL"); 3645 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 3646 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 3647 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs", 3648 CTLFLAG_RD, &txq->txpkts0_wrs, 3649 "# of txpkts (type 0) work requests"); 3650 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", 3651 CTLFLAG_RD, &txq->txpkts1_wrs, 3652 "# of txpkts (type 1) work requests"); 3653 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", 3654 CTLFLAG_RD, &txq->txpkts0_pkts, 3655 "# of frames tx'd using type0 txpkts work requests"); 3656 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", 3657 CTLFLAG_RD, &txq->txpkts1_pkts, 3658 "# of frames tx'd using type1 txpkts work requests"); 3659 3660 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", 3661 CTLFLAG_RD, &txq->r->enqueues, 3662 "# of enqueues to the mp_ring for this queue"); 3663 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops", 3664 CTLFLAG_RD, &txq->r->drops, 3665 "# of drops in the mp_ring for this queue"); 3666 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts", 3667 CTLFLAG_RD, &txq->r->starts, 3668 "# of normal consumer starts in the mp_ring for this queue"); 3669 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_stalls", 3670 CTLFLAG_RD, &txq->r->stalls, 3671 "# of consumer stalls in the mp_ring for this queue"); 3672 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_restarts", 3673 CTLFLAG_RD, &txq->r->restarts, 3674 "# of consumer restarts in the mp_ring for this queue"); 3675 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_abdications", 3676 CTLFLAG_RD, &txq->r->abdications, 3677 "# of consumer abdications in the mp_ring for this queue"); 3678 3679 return (0); 3680} 3681 3682static int 3683free_txq(struct vi_info *vi, struct sge_txq *txq) 3684{ 3685 int rc; 3686 struct adapter *sc = vi->pi->adapter; 3687 struct sge_eq *eq = &txq->eq; 3688 3689 rc = free_eq(sc, eq); 3690 if (rc) 3691 return (rc); 3692 3693 sglist_free(txq->gl); 3694 free(txq->sdesc, M_CXGBE); 3695 mp_ring_free(txq->r); 3696 3697 bzero(txq, sizeof(*txq)); 3698 return (0); 3699} 3700 3701static void 3702oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 3703{ 3704 bus_addr_t *ba = arg; 3705 3706 KASSERT(nseg == 1, 3707 ("%s meant for single segment mappings only.", __func__)); 3708 3709 *ba = error ? 0 : segs->ds_addr; 3710} 3711 3712static inline void 3713ring_fl_db(struct adapter *sc, struct sge_fl *fl) 3714{ 3715 uint32_t n, v; 3716 3717 n = IDXDIFF(fl->pidx / 8, fl->dbidx, fl->sidx); 3718 MPASS(n > 0); 3719 3720 wmb(); 3721 v = fl->dbval | V_PIDX(n); 3722 if (fl->udb) 3723 *fl->udb = htole32(v); 3724 else 3725 t4_write_reg(sc, sc->sge_kdoorbell_reg, v); 3726 IDXINCR(fl->dbidx, n, fl->sidx); 3727} 3728 3729/* 3730 * Fills up the freelist by allocating up to 'n' buffers. Buffers that are 3731 * recycled do not count towards this allocation budget. 3732 * 3733 * Returns non-zero to indicate that this freelist should be added to the list 3734 * of starving freelists. 3735 */ 3736static int 3737refill_fl(struct adapter *sc, struct sge_fl *fl, int n) 3738{ 3739 __be64 *d; 3740 struct fl_sdesc *sd; 3741 uintptr_t pa; 3742 caddr_t cl; 3743 struct cluster_layout *cll; 3744 struct sw_zone_info *swz; 3745 struct cluster_metadata *clm; 3746 uint16_t max_pidx; 3747 uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ 3748 3749 FL_LOCK_ASSERT_OWNED(fl); 3750 3751 /* 3752 * We always stop at the beginning of the hardware descriptor that's just 3753 * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, 3754 * which would mean an empty freelist to the chip. 3755 */ 3756 max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; 3757 if (fl->pidx == max_pidx * 8) 3758 return (0); 3759 3760 d = &fl->desc[fl->pidx]; 3761 sd = &fl->sdesc[fl->pidx]; 3762 cll = &fl->cll_def; /* default layout */ 3763 swz = &sc->sge.sw_zone_info[cll->zidx]; 3764 3765 while (n > 0) { 3766 3767 if (sd->cl != NULL) { 3768 3769 if (sd->nmbuf == 0) { 3770 /* 3771 * Fast recycle without involving any atomics on 3772 * the cluster's metadata (if the cluster has 3773 * metadata). This happens when all frames 3774 * received in the cluster were small enough to 3775 * fit within a single mbuf each. 3776 */ 3777 fl->cl_fast_recycled++; 3778#ifdef INVARIANTS 3779 clm = cl_metadata(sc, fl, &sd->cll, sd->cl); 3780 if (clm != NULL) 3781 MPASS(clm->refcount == 1); 3782#endif 3783 goto recycled_fast; 3784 } 3785 3786 /* 3787 * Cluster is guaranteed to have metadata. Clusters 3788 * without metadata always take the fast recycle path 3789 * when they're recycled. 3790 */ 3791 clm = cl_metadata(sc, fl, &sd->cll, sd->cl); 3792 MPASS(clm != NULL); 3793 3794 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 3795 fl->cl_recycled++; 3796 counter_u64_add(extfree_rels, 1); 3797 goto recycled; 3798 } 3799 sd->cl = NULL; /* gave up my reference */ 3800 } 3801 MPASS(sd->cl == NULL); 3802alloc: 3803 cl = uma_zalloc(swz->zone, M_NOWAIT); 3804 if (__predict_false(cl == NULL)) { 3805 if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 || 3806 fl->cll_def.zidx == fl->cll_alt.zidx) 3807 break; 3808 3809 /* fall back to the safe zone */ 3810 cll = &fl->cll_alt; 3811 swz = &sc->sge.sw_zone_info[cll->zidx]; 3812 goto alloc; 3813 } 3814 fl->cl_allocated++; 3815 n--; 3816 3817 pa = pmap_kextract((vm_offset_t)cl); 3818 pa += cll->region1; 3819 sd->cl = cl; 3820 sd->cll = *cll; 3821 *d = htobe64(pa | cll->hwidx); 3822 clm = cl_metadata(sc, fl, cll, cl); 3823 if (clm != NULL) { 3824recycled: 3825#ifdef INVARIANTS 3826 clm->sd = sd; 3827#endif 3828 clm->refcount = 1; 3829 } 3830 sd->nmbuf = 0; 3831recycled_fast: 3832 d++; 3833 sd++; 3834 if (__predict_false(++fl->pidx % 8 == 0)) { 3835 uint16_t pidx = fl->pidx / 8; 3836 3837 if (__predict_false(pidx == fl->sidx)) { 3838 fl->pidx = 0; 3839 pidx = 0; 3840 sd = fl->sdesc; 3841 d = fl->desc; 3842 } 3843 if (pidx == max_pidx) 3844 break; 3845 3846 if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) 3847 ring_fl_db(sc, fl); 3848 } 3849 } 3850 3851 if (fl->pidx / 8 != fl->dbidx) 3852 ring_fl_db(sc, fl); 3853 3854 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 3855} 3856 3857/* 3858 * Attempt to refill all starving freelists. 3859 */ 3860static void 3861refill_sfl(void *arg) 3862{ 3863 struct adapter *sc = arg; 3864 struct sge_fl *fl, *fl_temp; 3865 3866 mtx_assert(&sc->sfl_lock, MA_OWNED); 3867 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 3868 FL_LOCK(fl); 3869 refill_fl(sc, fl, 64); 3870 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 3871 TAILQ_REMOVE(&sc->sfl, fl, link); 3872 fl->flags &= ~FL_STARVING; 3873 } 3874 FL_UNLOCK(fl); 3875 } 3876 3877 if (!TAILQ_EMPTY(&sc->sfl)) 3878 callout_schedule(&sc->sfl_callout, hz / 5); 3879} 3880 3881static int 3882alloc_fl_sdesc(struct sge_fl *fl) 3883{ 3884 3885 fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE, 3886 M_ZERO | M_WAITOK); 3887 3888 return (0); 3889} 3890 3891static void 3892free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) 3893{ 3894 struct fl_sdesc *sd; 3895 struct cluster_metadata *clm; 3896 struct cluster_layout *cll; 3897 int i; 3898 3899 sd = fl->sdesc; 3900 for (i = 0; i < fl->sidx * 8; i++, sd++) { 3901 if (sd->cl == NULL) 3902 continue; 3903 3904 cll = &sd->cll; 3905 clm = cl_metadata(sc, fl, cll, sd->cl); 3906 if (sd->nmbuf == 0) 3907 uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); 3908 else if (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1) { 3909 uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); 3910 counter_u64_add(extfree_rels, 1); 3911 } 3912 sd->cl = NULL; 3913 } 3914 3915 free(fl->sdesc, M_CXGBE); 3916 fl->sdesc = NULL; 3917} 3918 3919static inline void 3920get_pkt_gl(struct mbuf *m, struct sglist *gl) 3921{ 3922 int rc; 3923 3924 M_ASSERTPKTHDR(m); 3925 3926 sglist_reset(gl); 3927 rc = sglist_append_mbuf(gl, m); 3928 if (__predict_false(rc != 0)) { 3929 panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " 3930 "with %d.", __func__, m, mbuf_nsegs(m), rc); 3931 } 3932 3933 KASSERT(gl->sg_nseg == mbuf_nsegs(m), 3934 ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, 3935 mbuf_nsegs(m), gl->sg_nseg)); 3936 KASSERT(gl->sg_nseg > 0 && 3937 gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS), 3938 ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, 3939 gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)); 3940} 3941 3942/* 3943 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 3944 */ 3945static inline u_int 3946txpkt_len16(u_int nsegs, u_int tso) 3947{ 3948 u_int n; 3949 3950 MPASS(nsegs > 0); 3951 3952 nsegs--; /* first segment is part of ulptx_sgl */ 3953 n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + 3954 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 3955 if (tso) 3956 n += sizeof(struct cpl_tx_pkt_lso_core); 3957 3958 return (howmany(n, 16)); 3959} 3960 3961/* 3962 * len16 for a txpkt_vm WR with a GL. Includes the firmware work 3963 * request header. 3964 */ 3965static inline u_int 3966txpkt_vm_len16(u_int nsegs, u_int tso) 3967{ 3968 u_int n; 3969 3970 MPASS(nsegs > 0); 3971 3972 nsegs--; /* first segment is part of ulptx_sgl */ 3973 n = sizeof(struct fw_eth_tx_pkt_vm_wr) + 3974 sizeof(struct cpl_tx_pkt_core) + 3975 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 3976 if (tso) 3977 n += sizeof(struct cpl_tx_pkt_lso_core); 3978 3979 return (howmany(n, 16)); 3980} 3981 3982/* 3983 * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work 3984 * request header. 3985 */ 3986static inline u_int 3987txpkts0_len16(u_int nsegs) 3988{ 3989 u_int n; 3990 3991 MPASS(nsegs > 0); 3992 3993 nsegs--; /* first segment is part of ulptx_sgl */ 3994 n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + 3995 sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 3996 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 3997 3998 return (howmany(n, 16)); 3999} 4000 4001/* 4002 * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work 4003 * request header. 4004 */ 4005static inline u_int 4006txpkts1_len16(void) 4007{ 4008 u_int n; 4009 4010 n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); 4011 4012 return (howmany(n, 16)); 4013} 4014 4015static inline u_int 4016imm_payload(u_int ndesc) 4017{ 4018 u_int n; 4019 4020 n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - 4021 sizeof(struct cpl_tx_pkt_core); 4022 4023 return (n); 4024} 4025 4026/* 4027 * Write a VM txpkt WR for this packet to the hardware descriptors, update the 4028 * software descriptor, and advance the pidx. It is guaranteed that enough 4029 * descriptors are available. 4030 * 4031 * The return value is the # of hardware descriptors used. 4032 */ 4033static u_int 4034write_txpkt_vm_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_vm_wr *wr, 4035 struct mbuf *m0, u_int available) 4036{ 4037 struct sge_eq *eq = &txq->eq; 4038 struct tx_sdesc *txsd; 4039 struct cpl_tx_pkt_core *cpl; 4040 uint32_t ctrl; /* used in many unrelated places */ 4041 uint64_t ctrl1; 4042 int csum_type, len16, ndesc, pktlen, nsegs; 4043 caddr_t dst; 4044 4045 TXQ_LOCK_ASSERT_OWNED(txq); 4046 M_ASSERTPKTHDR(m0); 4047 MPASS(available > 0 && available < eq->sidx); 4048 4049 len16 = mbuf_len16(m0); 4050 nsegs = mbuf_nsegs(m0); 4051 pktlen = m0->m_pkthdr.len; 4052 ctrl = sizeof(struct cpl_tx_pkt_core); 4053 if (needs_tso(m0)) 4054 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 4055 ndesc = howmany(len16, EQ_ESIZE / 16); 4056 MPASS(ndesc <= available); 4057 4058 /* Firmware work request header */ 4059 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4060 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | 4061 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 4062 4063 ctrl = V_FW_WR_LEN16(len16); 4064 wr->equiq_to_len16 = htobe32(ctrl); 4065 wr->r3[0] = 0; 4066 wr->r3[1] = 0; 4067 4068 /* 4069 * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. 4070 * vlantci is ignored unless the ethtype is 0x8100, so it's 4071 * simpler to always copy it rather than making it 4072 * conditional. Also, it seems that we do not have to set 4073 * vlantci or fake the ethtype when doing VLAN tag insertion. 4074 */ 4075 m_copydata(m0, 0, sizeof(struct ether_header) + 2, wr->ethmacdst); 4076 4077 csum_type = -1; 4078 if (needs_tso(m0)) { 4079 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 4080 4081 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 4082 m0->m_pkthdr.l4hlen > 0, 4083 ("%s: mbuf %p needs TSO but missing header lengths", 4084 __func__, m0)); 4085 4086 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 4087 F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) 4088 | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 4089 if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) 4090 ctrl |= V_LSO_ETHHDR_LEN(1); 4091 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4092 ctrl |= F_LSO_IPV6; 4093 4094 lso->lso_ctrl = htobe32(ctrl); 4095 lso->ipid_ofst = htobe16(0); 4096 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 4097 lso->seqno_offset = htobe32(0); 4098 lso->len = htobe32(pktlen); 4099 4100 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4101 csum_type = TX_CSUM_TCPIP6; 4102 else 4103 csum_type = TX_CSUM_TCPIP; 4104 4105 cpl = (void *)(lso + 1); 4106 4107 txq->tso_wrs++; 4108 } else { 4109 if (m0->m_pkthdr.csum_flags & CSUM_IP_TCP) 4110 csum_type = TX_CSUM_TCPIP; 4111 else if (m0->m_pkthdr.csum_flags & CSUM_IP_UDP) 4112 csum_type = TX_CSUM_UDPIP; 4113 else if (m0->m_pkthdr.csum_flags & CSUM_IP6_TCP) 4114 csum_type = TX_CSUM_TCPIP6; 4115 else if (m0->m_pkthdr.csum_flags & CSUM_IP6_UDP) 4116 csum_type = TX_CSUM_UDPIP6; 4117#if defined(INET) 4118 else if (m0->m_pkthdr.csum_flags & CSUM_IP) { 4119 /* 4120 * XXX: The firmware appears to stomp on the 4121 * fragment/flags field of the IP header when 4122 * using TX_CSUM_IP. Fall back to doing 4123 * software checksums. 4124 */ 4125 u_short *sump; 4126 struct mbuf *m; 4127 int offset; 4128 4129 m = m0; 4130 offset = 0; 4131 sump = m_advance(&m, &offset, m0->m_pkthdr.l2hlen + 4132 offsetof(struct ip, ip_sum)); 4133 *sump = in_cksum_skip(m0, m0->m_pkthdr.l2hlen + 4134 m0->m_pkthdr.l3hlen, m0->m_pkthdr.l2hlen); 4135 m0->m_pkthdr.csum_flags &= ~CSUM_IP; 4136 } 4137#endif 4138 4139 cpl = (void *)(wr + 1); 4140 } 4141 4142 /* Checksum offload */ 4143 ctrl1 = 0; 4144 if (needs_l3_csum(m0) == 0) 4145 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4146 if (csum_type >= 0) { 4147 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0, 4148 ("%s: mbuf %p needs checksum offload but missing header lengths", 4149 __func__, m0)); 4150 4151 /* XXX: T6 */ 4152 ctrl1 |= V_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - 4153 ETHER_HDR_LEN); 4154 ctrl1 |= V_TXPKT_IPHDR_LEN(m0->m_pkthdr.l3hlen); 4155 ctrl1 |= V_TXPKT_CSUM_TYPE(csum_type); 4156 } else 4157 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4158 if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4159 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4160 txq->txcsum++; /* some hardware assistance provided */ 4161 4162 /* VLAN tag insertion */ 4163 if (needs_vlan_insertion(m0)) { 4164 ctrl1 |= F_TXPKT_VLAN_VLD | 4165 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 4166 txq->vlan_insertion++; 4167 } 4168 4169 /* CPL header */ 4170 cpl->ctrl0 = txq->cpl_ctrl0; 4171 cpl->pack = 0; 4172 cpl->len = htobe16(pktlen); 4173 cpl->ctrl1 = htobe64(ctrl1); 4174 4175 /* SGL */ 4176 dst = (void *)(cpl + 1); 4177 4178 /* 4179 * A packet using TSO will use up an entire descriptor for the 4180 * firmware work request header, LSO CPL, and TX_PKT_XT CPL. 4181 * If this descriptor is the last descriptor in the ring, wrap 4182 * around to the front of the ring explicitly for the start of 4183 * the sgl. 4184 */ 4185 if (dst == (void *)&eq->desc[eq->sidx]) { 4186 dst = (void *)&eq->desc[0]; 4187 write_gl_to_txd(txq, m0, &dst, 0); 4188 } else 4189 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 4190 txq->sgl_wrs++; 4191 4192 txq->txpkt_wrs++; 4193 4194 txsd = &txq->sdesc[eq->pidx]; 4195 txsd->m = m0; 4196 txsd->desc_used = ndesc; 4197 4198 return (ndesc); 4199} 4200 4201/* 4202 * Write a txpkt WR for this packet to the hardware descriptors, update the 4203 * software descriptor, and advance the pidx. It is guaranteed that enough 4204 * descriptors are available. 4205 * 4206 * The return value is the # of hardware descriptors used. 4207 */ 4208static u_int 4209write_txpkt_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_wr *wr, 4210 struct mbuf *m0, u_int available) 4211{ 4212 struct sge_eq *eq = &txq->eq; 4213 struct tx_sdesc *txsd; 4214 struct cpl_tx_pkt_core *cpl; 4215 uint32_t ctrl; /* used in many unrelated places */ 4216 uint64_t ctrl1; 4217 int len16, ndesc, pktlen, nsegs; 4218 caddr_t dst; 4219 4220 TXQ_LOCK_ASSERT_OWNED(txq); 4221 M_ASSERTPKTHDR(m0); 4222 MPASS(available > 0 && available < eq->sidx); 4223 4224 len16 = mbuf_len16(m0); 4225 nsegs = mbuf_nsegs(m0); 4226 pktlen = m0->m_pkthdr.len; 4227 ctrl = sizeof(struct cpl_tx_pkt_core); 4228 if (needs_tso(m0)) 4229 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 4230 else if (pktlen <= imm_payload(2) && available >= 2) { 4231 /* Immediate data. Recalculate len16 and set nsegs to 0. */ 4232 ctrl += pktlen; 4233 len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + 4234 sizeof(struct cpl_tx_pkt_core) + pktlen, 16); 4235 nsegs = 0; 4236 } 4237 ndesc = howmany(len16, EQ_ESIZE / 16); 4238 MPASS(ndesc <= available); 4239 4240 /* Firmware work request header */ 4241 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4242 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 4243 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 4244 4245 ctrl = V_FW_WR_LEN16(len16); 4246 wr->equiq_to_len16 = htobe32(ctrl); 4247 wr->r3 = 0; 4248 4249 if (needs_tso(m0)) { 4250 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 4251 4252 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 4253 m0->m_pkthdr.l4hlen > 0, 4254 ("%s: mbuf %p needs TSO but missing header lengths", 4255 __func__, m0)); 4256 4257 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 4258 F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) 4259 | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 4260 if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) 4261 ctrl |= V_LSO_ETHHDR_LEN(1); 4262 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4263 ctrl |= F_LSO_IPV6; 4264 4265 lso->lso_ctrl = htobe32(ctrl); 4266 lso->ipid_ofst = htobe16(0); 4267 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 4268 lso->seqno_offset = htobe32(0); 4269 lso->len = htobe32(pktlen); 4270 4271 cpl = (void *)(lso + 1); 4272 4273 txq->tso_wrs++; 4274 } else 4275 cpl = (void *)(wr + 1); 4276 4277 /* Checksum offload */ 4278 ctrl1 = 0; 4279 if (needs_l3_csum(m0) == 0) 4280 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4281 if (needs_l4_csum(m0) == 0) 4282 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4283 if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4284 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4285 txq->txcsum++; /* some hardware assistance provided */ 4286 4287 /* VLAN tag insertion */ 4288 if (needs_vlan_insertion(m0)) { 4289 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 4290 txq->vlan_insertion++; 4291 } 4292 4293 /* CPL header */ 4294 cpl->ctrl0 = txq->cpl_ctrl0; 4295 cpl->pack = 0; 4296 cpl->len = htobe16(pktlen); 4297 cpl->ctrl1 = htobe64(ctrl1); 4298 4299 /* SGL */ 4300 dst = (void *)(cpl + 1); 4301 if (nsegs > 0) { 4302 4303 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 4304 txq->sgl_wrs++; 4305 } else { 4306 struct mbuf *m; 4307 4308 for (m = m0; m != NULL; m = m->m_next) { 4309 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 4310#ifdef INVARIANTS 4311 pktlen -= m->m_len; 4312#endif 4313 } 4314#ifdef INVARIANTS 4315 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 4316#endif 4317 txq->imm_wrs++; 4318 } 4319 4320 txq->txpkt_wrs++; 4321 4322 txsd = &txq->sdesc[eq->pidx]; 4323 txsd->m = m0; 4324 txsd->desc_used = ndesc; 4325 4326 return (ndesc); 4327} 4328 4329static int 4330try_txpkts(struct mbuf *m, struct mbuf *n, struct txpkts *txp, u_int available) 4331{ 4332 u_int needed, nsegs1, nsegs2, l1, l2; 4333 4334 if (cannot_use_txpkts(m) || cannot_use_txpkts(n)) 4335 return (1); 4336 4337 nsegs1 = mbuf_nsegs(m); 4338 nsegs2 = mbuf_nsegs(n); 4339 if (nsegs1 + nsegs2 == 2) { 4340 txp->wr_type = 1; 4341 l1 = l2 = txpkts1_len16(); 4342 } else { 4343 txp->wr_type = 0; 4344 l1 = txpkts0_len16(nsegs1); 4345 l2 = txpkts0_len16(nsegs2); 4346 } 4347 txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + l1 + l2; 4348 needed = howmany(txp->len16, EQ_ESIZE / 16); 4349 if (needed > SGE_MAX_WR_NDESC || needed > available) 4350 return (1); 4351 4352 txp->plen = m->m_pkthdr.len + n->m_pkthdr.len; 4353 if (txp->plen > 65535) 4354 return (1); 4355 4356 txp->npkt = 2; 4357 set_mbuf_len16(m, l1); 4358 set_mbuf_len16(n, l2); 4359 4360 return (0); 4361} 4362 4363static int 4364add_to_txpkts(struct mbuf *m, struct txpkts *txp, u_int available) 4365{ 4366 u_int plen, len16, needed, nsegs; 4367 4368 MPASS(txp->wr_type == 0 || txp->wr_type == 1); 4369 4370 nsegs = mbuf_nsegs(m); 4371 if (needs_tso(m) || (txp->wr_type == 1 && nsegs != 1)) 4372 return (1); 4373 4374 plen = txp->plen + m->m_pkthdr.len; 4375 if (plen > 65535) 4376 return (1); 4377 4378 if (txp->wr_type == 0) 4379 len16 = txpkts0_len16(nsegs); 4380 else 4381 len16 = txpkts1_len16(); 4382 needed = howmany(txp->len16 + len16, EQ_ESIZE / 16); 4383 if (needed > SGE_MAX_WR_NDESC || needed > available) 4384 return (1); 4385 4386 txp->npkt++; 4387 txp->plen = plen; 4388 txp->len16 += len16; 4389 set_mbuf_len16(m, len16); 4390 4391 return (0); 4392} 4393 4394/* 4395 * Write a txpkts WR for the packets in txp to the hardware descriptors, update 4396 * the software descriptor, and advance the pidx. It is guaranteed that enough 4397 * descriptors are available. 4398 * 4399 * The return value is the # of hardware descriptors used. 4400 */ 4401static u_int 4402write_txpkts_wr(struct sge_txq *txq, struct fw_eth_tx_pkts_wr *wr, 4403 struct mbuf *m0, const struct txpkts *txp, u_int available) 4404{ 4405 struct sge_eq *eq = &txq->eq; 4406 struct tx_sdesc *txsd; 4407 struct cpl_tx_pkt_core *cpl; 4408 uint32_t ctrl; 4409 uint64_t ctrl1; 4410 int ndesc, checkwrap; 4411 struct mbuf *m; 4412 void *flitp; 4413 4414 TXQ_LOCK_ASSERT_OWNED(txq); 4415 MPASS(txp->npkt > 0); 4416 MPASS(txp->plen < 65536); 4417 MPASS(m0 != NULL); 4418 MPASS(m0->m_nextpkt != NULL); 4419 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 4420 MPASS(available > 0 && available < eq->sidx); 4421 4422 ndesc = howmany(txp->len16, EQ_ESIZE / 16); 4423 MPASS(ndesc <= available); 4424 4425 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4426 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 4427 ctrl = V_FW_WR_LEN16(txp->len16); 4428 wr->equiq_to_len16 = htobe32(ctrl); 4429 wr->plen = htobe16(txp->plen); 4430 wr->npkt = txp->npkt; 4431 wr->r3 = 0; 4432 wr->type = txp->wr_type; 4433 flitp = wr + 1; 4434 4435 /* 4436 * At this point we are 16B into a hardware descriptor. If checkwrap is 4437 * set then we know the WR is going to wrap around somewhere. We'll 4438 * check for that at appropriate points. 4439 */ 4440 checkwrap = eq->sidx - ndesc < eq->pidx; 4441 for (m = m0; m != NULL; m = m->m_nextpkt) { 4442 if (txp->wr_type == 0) { 4443 struct ulp_txpkt *ulpmc; 4444 struct ulptx_idata *ulpsc; 4445 4446 /* ULP master command */ 4447 ulpmc = flitp; 4448 ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | 4449 V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); 4450 ulpmc->len = htobe32(mbuf_len16(m)); 4451 4452 /* ULP subcommand */ 4453 ulpsc = (void *)(ulpmc + 1); 4454 ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | 4455 F_ULP_TX_SC_MORE); 4456 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 4457 4458 cpl = (void *)(ulpsc + 1); 4459 if (checkwrap && 4460 (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) 4461 cpl = (void *)&eq->desc[0]; 4462 txq->txpkts0_pkts += txp->npkt; 4463 txq->txpkts0_wrs++; 4464 } else { 4465 cpl = flitp; 4466 txq->txpkts1_pkts += txp->npkt; 4467 txq->txpkts1_wrs++; 4468 } 4469 4470 /* Checksum offload */ 4471 ctrl1 = 0; 4472 if (needs_l3_csum(m) == 0) 4473 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4474 if (needs_l4_csum(m) == 0) 4475 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4476 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4477 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4478 txq->txcsum++; /* some hardware assistance provided */ 4479 4480 /* VLAN tag insertion */ 4481 if (needs_vlan_insertion(m)) { 4482 ctrl1 |= F_TXPKT_VLAN_VLD | 4483 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 4484 txq->vlan_insertion++; 4485 } 4486 4487 /* CPL header */ 4488 cpl->ctrl0 = txq->cpl_ctrl0; 4489 cpl->pack = 0; 4490 cpl->len = htobe16(m->m_pkthdr.len); 4491 cpl->ctrl1 = htobe64(ctrl1); 4492 4493 flitp = cpl + 1; 4494 if (checkwrap && 4495 (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 4496 flitp = (void *)&eq->desc[0]; 4497 4498 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); 4499 4500 } 4501 4502 txsd = &txq->sdesc[eq->pidx]; 4503 txsd->m = m0; 4504 txsd->desc_used = ndesc; 4505 4506 return (ndesc); 4507} 4508 4509/* 4510 * If the SGL ends on an address that is not 16 byte aligned, this function will 4511 * add a 0 filled flit at the end. 4512 */ 4513static void 4514write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) 4515{ 4516 struct sge_eq *eq = &txq->eq; 4517 struct sglist *gl = txq->gl; 4518 struct sglist_seg *seg; 4519 __be64 *flitp, *wrap; 4520 struct ulptx_sgl *usgl; 4521 int i, nflits, nsegs; 4522 4523 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 4524 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 4525 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 4526 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 4527 4528 get_pkt_gl(m, gl); 4529 nsegs = gl->sg_nseg; 4530 MPASS(nsegs > 0); 4531 4532 nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; 4533 flitp = (__be64 *)(*to); 4534 wrap = (__be64 *)(&eq->desc[eq->sidx]); 4535 seg = &gl->sg_segs[0]; 4536 usgl = (void *)flitp; 4537 4538 /* 4539 * We start at a 16 byte boundary somewhere inside the tx descriptor 4540 * ring, so we're at least 16 bytes away from the status page. There is 4541 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 4542 */ 4543 4544 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 4545 V_ULPTX_NSGE(nsegs)); 4546 usgl->len0 = htobe32(seg->ss_len); 4547 usgl->addr0 = htobe64(seg->ss_paddr); 4548 seg++; 4549 4550 if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { 4551 4552 /* Won't wrap around at all */ 4553 4554 for (i = 0; i < nsegs - 1; i++, seg++) { 4555 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); 4556 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); 4557 } 4558 if (i & 1) 4559 usgl->sge[i / 2].len[1] = htobe32(0); 4560 flitp += nflits; 4561 } else { 4562 4563 /* Will wrap somewhere in the rest of the SGL */ 4564 4565 /* 2 flits already written, write the rest flit by flit */ 4566 flitp = (void *)(usgl + 1); 4567 for (i = 0; i < nflits - 2; i++) { 4568 if (flitp == wrap) 4569 flitp = (void *)eq->desc; 4570 *flitp++ = get_flit(seg, nsegs - 1, i); 4571 } 4572 } 4573 4574 if (nflits & 1) { 4575 MPASS(((uintptr_t)flitp) & 0xf); 4576 *flitp++ = 0; 4577 } 4578 4579 MPASS((((uintptr_t)flitp) & 0xf) == 0); 4580 if (__predict_false(flitp == wrap)) 4581 *to = (void *)eq->desc; 4582 else 4583 *to = (void *)flitp; 4584} 4585 4586static inline void 4587copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 4588{ 4589 4590 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 4591 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 4592 4593 if (__predict_true((uintptr_t)(*to) + len <= 4594 (uintptr_t)&eq->desc[eq->sidx])) { 4595 bcopy(from, *to, len); 4596 (*to) += len; 4597 } else { 4598 int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); 4599 4600 bcopy(from, *to, portion); 4601 from += portion; 4602 portion = len - portion; /* remaining */ 4603 bcopy(from, (void *)eq->desc, portion); 4604 (*to) = (caddr_t)eq->desc + portion; 4605 } 4606} 4607 4608static inline void 4609ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) 4610{ 4611 u_int db; 4612 4613 MPASS(n > 0); 4614 4615 db = eq->doorbells; 4616 if (n > 1) 4617 clrbit(&db, DOORBELL_WCWR); 4618 wmb(); 4619 4620 switch (ffs(db) - 1) { 4621 case DOORBELL_UDB: 4622 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 4623 break; 4624 4625 case DOORBELL_WCWR: { 4626 volatile uint64_t *dst, *src; 4627 int i; 4628 4629 /* 4630 * Queues whose 128B doorbell segment fits in the page do not 4631 * use relative qid (udb_qid is always 0). Only queues with 4632 * doorbell segments can do WCWR. 4633 */ 4634 KASSERT(eq->udb_qid == 0 && n == 1, 4635 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", 4636 __func__, eq->doorbells, n, eq->dbidx, eq)); 4637 4638 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - 4639 UDBS_DB_OFFSET); 4640 i = eq->dbidx; 4641 src = (void *)&eq->desc[i]; 4642 while (src != (void *)&eq->desc[i + 1]) 4643 *dst++ = *src++; 4644 wmb(); 4645 break; 4646 } 4647 4648 case DOORBELL_UDBWC: 4649 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 4650 wmb(); 4651 break; 4652 4653 case DOORBELL_KDB: 4654 t4_write_reg(sc, sc->sge_kdoorbell_reg, 4655 V_QID(eq->cntxt_id) | V_PIDX(n)); 4656 break; 4657 } 4658 4659 IDXINCR(eq->dbidx, n, eq->sidx); 4660} 4661 4662static inline u_int 4663reclaimable_tx_desc(struct sge_eq *eq) 4664{ 4665 uint16_t hw_cidx; 4666 4667 hw_cidx = read_hw_cidx(eq); 4668 return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); 4669} 4670 4671static inline u_int 4672total_available_tx_desc(struct sge_eq *eq) 4673{ 4674 uint16_t hw_cidx, pidx; 4675 4676 hw_cidx = read_hw_cidx(eq); 4677 pidx = eq->pidx; 4678 4679 if (pidx == hw_cidx) 4680 return (eq->sidx - 1); 4681 else 4682 return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); 4683} 4684 4685static inline uint16_t 4686read_hw_cidx(struct sge_eq *eq) 4687{ 4688 struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; 4689 uint16_t cidx = spg->cidx; /* stable snapshot */ 4690 4691 return (be16toh(cidx)); 4692} 4693 4694/* 4695 * Reclaim 'n' descriptors approximately. 4696 */ 4697static u_int 4698reclaim_tx_descs(struct sge_txq *txq, u_int n) 4699{ 4700 struct tx_sdesc *txsd; 4701 struct sge_eq *eq = &txq->eq; 4702 u_int can_reclaim, reclaimed; 4703 4704 TXQ_LOCK_ASSERT_OWNED(txq); 4705 MPASS(n > 0); 4706 4707 reclaimed = 0; 4708 can_reclaim = reclaimable_tx_desc(eq); 4709 while (can_reclaim && reclaimed < n) { 4710 int ndesc; 4711 struct mbuf *m, *nextpkt; 4712 4713 txsd = &txq->sdesc[eq->cidx]; 4714 ndesc = txsd->desc_used; 4715 4716 /* Firmware doesn't return "partial" credits. */ 4717 KASSERT(can_reclaim >= ndesc, 4718 ("%s: unexpected number of credits: %d, %d", 4719 __func__, can_reclaim, ndesc)); 4720 4721 for (m = txsd->m; m != NULL; m = nextpkt) { 4722 nextpkt = m->m_nextpkt; 4723 m->m_nextpkt = NULL; 4724 m_freem(m); 4725 } 4726 reclaimed += ndesc; 4727 can_reclaim -= ndesc; 4728 IDXINCR(eq->cidx, ndesc, eq->sidx); 4729 } 4730 4731 return (reclaimed); 4732} 4733 4734static void 4735tx_reclaim(void *arg, int n) 4736{ 4737 struct sge_txq *txq = arg; 4738 struct sge_eq *eq = &txq->eq; 4739 4740 do { 4741 if (TXQ_TRYLOCK(txq) == 0) 4742 break; 4743 n = reclaim_tx_descs(txq, 32); 4744 if (eq->cidx == eq->pidx) 4745 eq->equeqidx = eq->pidx; 4746 TXQ_UNLOCK(txq); 4747 } while (n > 0); 4748} 4749 4750static __be64 4751get_flit(struct sglist_seg *segs, int nsegs, int idx) 4752{ 4753 int i = (idx / 3) * 2; 4754 4755 switch (idx % 3) { 4756 case 0: { 4757 __be64 rc; 4758 4759 rc = htobe32(segs[i].ss_len); 4760 if (i + 1 < nsegs) 4761 rc |= (uint64_t)htobe32(segs[i + 1].ss_len) << 32; 4762 4763 return (rc); 4764 } 4765 case 1: 4766 return (htobe64(segs[i].ss_paddr)); 4767 case 2: 4768 return (htobe64(segs[i + 1].ss_paddr)); 4769 } 4770 4771 return (0); 4772} 4773 4774static void 4775find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp) 4776{ 4777 int8_t zidx, hwidx, idx; 4778 uint16_t region1, region3; 4779 int spare, spare_needed, n; 4780 struct sw_zone_info *swz; 4781 struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0]; 4782 4783 /* 4784 * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize 4785 * large enough for the max payload and cluster metadata. Otherwise 4786 * settle for the largest bufsize that leaves enough room in the cluster 4787 * for metadata. 4788 * 4789 * Without buffer packing: Look for the smallest zone which has a 4790 * bufsize large enough for the max payload. Settle for the largest 4791 * bufsize available if there's nothing big enough for max payload. 4792 */ 4793 spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0; 4794 swz = &sc->sge.sw_zone_info[0]; 4795 hwidx = -1; 4796 for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) { 4797 if (swz->size > largest_rx_cluster) { 4798 if (__predict_true(hwidx != -1)) 4799 break; 4800 4801 /* 4802 * This is a misconfiguration. largest_rx_cluster is 4803 * preventing us from finding a refill source. See 4804 * dev.t5nex.<n>.buffer_sizes to figure out why. 4805 */ 4806 device_printf(sc->dev, "largest_rx_cluster=%u leaves no" 4807 " refill source for fl %p (dma %u). Ignored.\n", 4808 largest_rx_cluster, fl, maxp); 4809 } 4810 for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) { 4811 hwb = &hwb_list[idx]; 4812 spare = swz->size - hwb->size; 4813 if (spare < spare_needed) 4814 continue; 4815 4816 hwidx = idx; /* best option so far */ 4817 if (hwb->size >= maxp) { 4818 4819 if ((fl->flags & FL_BUF_PACKING) == 0) 4820 goto done; /* stop looking (not packing) */ 4821 4822 if (swz->size >= safest_rx_cluster) 4823 goto done; /* stop looking (packing) */ 4824 } 4825 break; /* keep looking, next zone */ 4826 } 4827 } 4828done: 4829 /* A usable hwidx has been located. */ 4830 MPASS(hwidx != -1); 4831 hwb = &hwb_list[hwidx]; 4832 zidx = hwb->zidx; 4833 swz = &sc->sge.sw_zone_info[zidx]; 4834 region1 = 0; 4835 region3 = swz->size - hwb->size; 4836 4837 /* 4838 * Stay within this zone and see if there is a better match when mbuf 4839 * inlining is allowed. Remember that the hwidx's are sorted in 4840 * decreasing order of size (so in increasing order of spare area). 4841 */ 4842 for (idx = hwidx; idx != -1; idx = hwb->next) { 4843 hwb = &hwb_list[idx]; 4844 spare = swz->size - hwb->size; 4845 4846 if (allow_mbufs_in_cluster == 0 || hwb->size < maxp) 4847 break; 4848 4849 /* 4850 * Do not inline mbufs if doing so would violate the pad/pack 4851 * boundary alignment requirement. 4852 */ 4853 if (fl_pad && (MSIZE % sc->params.sge.pad_boundary) != 0) 4854 continue; 4855 if (fl->flags & FL_BUF_PACKING && 4856 (MSIZE % sc->params.sge.pack_boundary) != 0) 4857 continue; 4858 4859 if (spare < CL_METADATA_SIZE + MSIZE) 4860 continue; 4861 n = (spare - CL_METADATA_SIZE) / MSIZE; 4862 if (n > howmany(hwb->size, maxp)) 4863 break; 4864 4865 hwidx = idx; 4866 if (fl->flags & FL_BUF_PACKING) { 4867 region1 = n * MSIZE; 4868 region3 = spare - region1; 4869 } else { 4870 region1 = MSIZE; 4871 region3 = spare - region1; 4872 break; 4873 } 4874 } 4875 4876 KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES, 4877 ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp)); 4878 KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES, 4879 ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp)); 4880 KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 == 4881 sc->sge.sw_zone_info[zidx].size, 4882 ("%s: bad buffer layout for fl %p, maxp %d. " 4883 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 4884 sc->sge.sw_zone_info[zidx].size, region1, 4885 sc->sge.hw_buf_info[hwidx].size, region3)); 4886 if (fl->flags & FL_BUF_PACKING || region1 > 0) { 4887 KASSERT(region3 >= CL_METADATA_SIZE, 4888 ("%s: no room for metadata. fl %p, maxp %d; " 4889 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 4890 sc->sge.sw_zone_info[zidx].size, region1, 4891 sc->sge.hw_buf_info[hwidx].size, region3)); 4892 KASSERT(region1 % MSIZE == 0, 4893 ("%s: bad mbuf region for fl %p, maxp %d. " 4894 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 4895 sc->sge.sw_zone_info[zidx].size, region1, 4896 sc->sge.hw_buf_info[hwidx].size, region3)); 4897 } 4898 4899 fl->cll_def.zidx = zidx; 4900 fl->cll_def.hwidx = hwidx; 4901 fl->cll_def.region1 = region1; 4902 fl->cll_def.region3 = region3; 4903} 4904 4905static void 4906find_safe_refill_source(struct adapter *sc, struct sge_fl *fl) 4907{ 4908 struct sge *s = &sc->sge; 4909 struct hw_buf_info *hwb; 4910 struct sw_zone_info *swz; 4911 int spare; 4912 int8_t hwidx; 4913 4914 if (fl->flags & FL_BUF_PACKING) 4915 hwidx = s->safe_hwidx2; /* with room for metadata */ 4916 else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) { 4917 hwidx = s->safe_hwidx2; 4918 hwb = &s->hw_buf_info[hwidx]; 4919 swz = &s->sw_zone_info[hwb->zidx]; 4920 spare = swz->size - hwb->size; 4921 4922 /* no good if there isn't room for an mbuf as well */ 4923 if (spare < CL_METADATA_SIZE + MSIZE) 4924 hwidx = s->safe_hwidx1; 4925 } else 4926 hwidx = s->safe_hwidx1; 4927 4928 if (hwidx == -1) { 4929 /* No fallback source */ 4930 fl->cll_alt.hwidx = -1; 4931 fl->cll_alt.zidx = -1; 4932 4933 return; 4934 } 4935 4936 hwb = &s->hw_buf_info[hwidx]; 4937 swz = &s->sw_zone_info[hwb->zidx]; 4938 spare = swz->size - hwb->size; 4939 fl->cll_alt.hwidx = hwidx; 4940 fl->cll_alt.zidx = hwb->zidx; 4941 if (allow_mbufs_in_cluster && 4942 (fl_pad == 0 || (MSIZE % sc->params.sge.pad_boundary) == 0)) 4943 fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE; 4944 else 4945 fl->cll_alt.region1 = 0; 4946 fl->cll_alt.region3 = spare - fl->cll_alt.region1; 4947} 4948 4949static void 4950add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 4951{ 4952 mtx_lock(&sc->sfl_lock); 4953 FL_LOCK(fl); 4954 if ((fl->flags & FL_DOOMED) == 0) { 4955 fl->flags |= FL_STARVING; 4956 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 4957 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 4958 } 4959 FL_UNLOCK(fl); 4960 mtx_unlock(&sc->sfl_lock); 4961} 4962 4963static void 4964handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) 4965{ 4966 struct sge_wrq *wrq = (void *)eq; 4967 4968 atomic_readandclear_int(&eq->equiq); 4969 taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); 4970} 4971 4972static void 4973handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) 4974{ 4975 struct sge_txq *txq = (void *)eq; 4976 4977 MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH); 4978 4979 atomic_readandclear_int(&eq->equiq); 4980 mp_ring_check_drainage(txq->r, 0); 4981 taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); 4982} 4983 4984static int 4985handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 4986 struct mbuf *m) 4987{ 4988 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 4989 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 4990 struct adapter *sc = iq->adapter; 4991 struct sge *s = &sc->sge; 4992 struct sge_eq *eq; 4993 static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, 4994 &handle_wrq_egr_update, &handle_eth_egr_update, 4995 &handle_wrq_egr_update}; 4996 4997 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 4998 rss->opcode)); 4999 5000 eq = s->eqmap[qid - s->eq_start - s->eq_base]; 5001 (*h[eq->flags & EQ_TYPEMASK])(sc, eq); 5002 5003 return (0); 5004} 5005 5006/* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ 5007CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ 5008 offsetof(struct cpl_fw6_msg, data)); 5009 5010static int 5011handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 5012{ 5013 struct adapter *sc = iq->adapter; 5014 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 5015 5016 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 5017 rss->opcode)); 5018 5019 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 5020 const struct rss_header *rss2; 5021 5022 rss2 = (const struct rss_header *)&cpl->data[0]; 5023 return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); 5024 } 5025 5026 return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); 5027} 5028 5029/** 5030 * t4_handle_wrerr_rpl - process a FW work request error message 5031 * @adap: the adapter 5032 * @rpl: start of the FW message 5033 */ 5034static int 5035t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) 5036{ 5037 u8 opcode = *(const u8 *)rpl; 5038 const struct fw_error_cmd *e = (const void *)rpl; 5039 unsigned int i; 5040 5041 if (opcode != FW_ERROR_CMD) { 5042 log(LOG_ERR, 5043 "%s: Received WRERR_RPL message with opcode %#x\n", 5044 device_get_nameunit(adap->dev), opcode); 5045 return (EINVAL); 5046 } 5047 log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), 5048 G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : 5049 "non-fatal"); 5050 switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { 5051 case FW_ERROR_TYPE_EXCEPTION: 5052 log(LOG_ERR, "exception info:\n"); 5053 for (i = 0; i < nitems(e->u.exception.info); i++) 5054 log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", 5055 be32toh(e->u.exception.info[i])); 5056 log(LOG_ERR, "\n"); 5057 break; 5058 case FW_ERROR_TYPE_HWMODULE: 5059 log(LOG_ERR, "HW module regaddr %08x regval %08x\n", 5060 be32toh(e->u.hwmodule.regaddr), 5061 be32toh(e->u.hwmodule.regval)); 5062 break; 5063 case FW_ERROR_TYPE_WR: 5064 log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", 5065 be16toh(e->u.wr.cidx), 5066 G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), 5067 G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), 5068 be32toh(e->u.wr.eqid)); 5069 for (i = 0; i < nitems(e->u.wr.wrhdr); i++) 5070 log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", 5071 e->u.wr.wrhdr[i]); 5072 log(LOG_ERR, "\n"); 5073 break; 5074 case FW_ERROR_TYPE_ACL: 5075 log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", 5076 be16toh(e->u.acl.cidx), 5077 G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), 5078 G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), 5079 be32toh(e->u.acl.eqid), 5080 G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : 5081 "MAC"); 5082 for (i = 0; i < nitems(e->u.acl.val); i++) 5083 log(LOG_ERR, " %02x", e->u.acl.val[i]); 5084 log(LOG_ERR, "\n"); 5085 break; 5086 default: 5087 log(LOG_ERR, "type %#x\n", 5088 G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); 5089 return (EINVAL); 5090 } 5091 return (0); 5092} 5093 5094static int 5095sysctl_uint16(SYSCTL_HANDLER_ARGS) 5096{ 5097 uint16_t *id = arg1; 5098 int i = *id; 5099 5100 return sysctl_handle_int(oidp, &i, 0, req); 5101} 5102 5103static int 5104sysctl_bufsizes(SYSCTL_HANDLER_ARGS) 5105{ 5106 struct sge *s = arg1; 5107 struct hw_buf_info *hwb = &s->hw_buf_info[0]; 5108 struct sw_zone_info *swz = &s->sw_zone_info[0]; 5109 int i, rc; 5110 struct sbuf sb; 5111 char c; 5112 5113 sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); 5114 for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) { 5115 if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster) 5116 c = '*'; 5117 else 5118 c = '\0'; 5119 5120 sbuf_printf(&sb, "%u%c ", hwb->size, c); 5121 } 5122 sbuf_trim(&sb); 5123 sbuf_finish(&sb); 5124 rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 5125 sbuf_delete(&sb); 5126 return (rc); 5127} 5128 5129static int 5130sysctl_tc(SYSCTL_HANDLER_ARGS) 5131{ 5132 struct vi_info *vi = arg1; 5133 struct port_info *pi; 5134 struct adapter *sc; 5135 struct sge_txq *txq; 5136 struct tx_sched_class *tc; 5137 int qidx = arg2, rc, tc_idx; 5138 uint32_t fw_queue, fw_class; 5139 5140 MPASS(qidx >= 0 && qidx < vi->ntxq); 5141 pi = vi->pi; 5142 sc = pi->adapter; 5143 txq = &sc->sge.txq[vi->first_txq + qidx]; 5144 5145 tc_idx = txq->tc_idx; 5146 rc = sysctl_handle_int(oidp, &tc_idx, 0, req); 5147 if (rc != 0 || req->newptr == NULL) 5148 return (rc); 5149 5150 /* Note that -1 is legitimate input (it means unbind). */ 5151 if (tc_idx < -1 || tc_idx >= sc->chip_params->nsched_cls) 5152 return (EINVAL); 5153 5154 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4stc"); 5155 if (rc) 5156 return (rc); 5157 5158 if (tc_idx == txq->tc_idx) { 5159 rc = 0; /* No change, nothing to do. */ 5160 goto done; 5161 } 5162 5163 fw_queue = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 5164 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | 5165 V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id); 5166 5167 if (tc_idx == -1) 5168 fw_class = 0xffffffff; /* Unbind. */ 5169 else { 5170 /* 5171 * Bind to a different class. Ethernet txq's are only allowed 5172 * to bind to cl-rl mode-class for now. XXX: too restrictive. 5173 */ 5174 tc = &pi->tc[tc_idx]; 5175 if (tc->flags & TX_SC_OK && 5176 tc->params.level == SCHED_CLASS_LEVEL_CL_RL && 5177 tc->params.mode == SCHED_CLASS_MODE_CLASS) { 5178 /* Ok to proceed. */ 5179 fw_class = tc_idx; 5180 } else { 5181 rc = tc->flags & TX_SC_OK ? EBUSY : ENXIO; 5182 goto done; 5183 } 5184 } 5185 5186 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); 5187 if (rc == 0) { 5188 if (txq->tc_idx != -1) { 5189 tc = &pi->tc[txq->tc_idx]; 5190 MPASS(tc->refcount > 0); 5191 tc->refcount--; 5192 } 5193 if (tc_idx != -1) { 5194 tc = &pi->tc[tc_idx]; 5195 tc->refcount++; 5196 } 5197 txq->tc_idx = tc_idx; 5198 } 5199done: 5200 end_synchronized_op(sc, 0); 5201 return (rc); 5202} 5203