sfxge_rx.c revision 282940
1/*- 2 * Copyright (c) 2010-2011 Solarflare Communications, Inc. 3 * All rights reserved. 4 * 5 * This software was developed in part by Philip Paeps under contract for 6 * Solarflare Communications, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/sfxge/sfxge_rx.c 282940 2015-05-15 06:48:36Z arybchik $"); 32 33#include <sys/types.h> 34#include <sys/mbuf.h> 35#include <sys/smp.h> 36#include <sys/socket.h> 37#include <sys/sysctl.h> 38#include <sys/limits.h> 39#include <sys/syslog.h> 40 41#include <net/ethernet.h> 42#include <net/if.h> 43#include <net/if_vlan_var.h> 44 45#include <netinet/in.h> 46#include <netinet/ip.h> 47#include <netinet/ip6.h> 48#include <netinet/tcp.h> 49 50#include <machine/in_cksum.h> 51 52#include "common/efx.h" 53 54 55#include "sfxge.h" 56#include "sfxge_rx.h" 57 58#define RX_REFILL_THRESHOLD(_entries) (EFX_RXQ_LIMIT(_entries) * 9 / 10) 59 60#ifdef SFXGE_LRO 61 62SYSCTL_NODE(_hw_sfxge, OID_AUTO, lro, CTLFLAG_RD, NULL, 63 "Large receive offload (LRO) parameters"); 64 65#define SFXGE_LRO_PARAM(_param) SFXGE_PARAM(lro._param) 66 67/* Size of the LRO hash table. Must be a power of 2. A larger table 68 * means we can accelerate a larger number of streams. 69 */ 70static unsigned lro_table_size = 128; 71TUNABLE_INT(SFXGE_LRO_PARAM(table_size), &lro_table_size); 72SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, table_size, CTLFLAG_RDTUN, 73 &lro_table_size, 0, 74 "Size of the LRO hash table (must be a power of 2)"); 75 76/* Maximum length of a hash chain. If chains get too long then the lookup 77 * time increases and may exceed the benefit of LRO. 78 */ 79static unsigned lro_chain_max = 20; 80TUNABLE_INT(SFXGE_LRO_PARAM(chain_max), &lro_chain_max); 81SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, chain_max, CTLFLAG_RDTUN, 82 &lro_chain_max, 0, 83 "The maximum length of a hash chain"); 84 85/* Maximum time (in ticks) that a connection can be idle before it's LRO 86 * state is discarded. 87 */ 88static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */ 89TUNABLE_INT(SFXGE_LRO_PARAM(idle_ticks), &lro_idle_ticks); 90SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, idle_ticks, CTLFLAG_RDTUN, 91 &lro_idle_ticks, 0, 92 "The maximum time (in ticks) that a connection can be idle " 93 "before it's LRO state is discarded"); 94 95/* Number of packets with payload that must arrive in-order before a 96 * connection is eligible for LRO. The idea is we should avoid coalescing 97 * segments when the sender is in slow-start because reducing the ACK rate 98 * can damage performance. 99 */ 100static int lro_slow_start_packets = 2000; 101TUNABLE_INT(SFXGE_LRO_PARAM(slow_start_packets), &lro_slow_start_packets); 102SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, slow_start_packets, CTLFLAG_RDTUN, 103 &lro_slow_start_packets, 0, 104 "Number of packets with payload that must arrive in-order before " 105 "a connection is eligible for LRO"); 106 107/* Number of packets with payload that must arrive in-order following loss 108 * before a connection is eligible for LRO. The idea is we should avoid 109 * coalescing segments when the sender is recovering from loss, because 110 * reducing the ACK rate can damage performance. 111 */ 112static int lro_loss_packets = 20; 113TUNABLE_INT(SFXGE_LRO_PARAM(loss_packets), &lro_loss_packets); 114SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, loss_packets, CTLFLAG_RDTUN, 115 &lro_loss_packets, 0, 116 "Number of packets with payload that must arrive in-order " 117 "following loss before a connection is eligible for LRO"); 118 119/* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */ 120#define SFXGE_LRO_L2_ID_VLAN 0x4000 121#define SFXGE_LRO_L2_ID_IPV6 0x8000 122#define SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN) 123#define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6)) 124 125/* Compare IPv6 addresses, avoiding conditional branches */ 126static unsigned long ipv6_addr_cmp(const struct in6_addr *left, 127 const struct in6_addr *right) 128{ 129#if LONG_BIT == 64 130 const uint64_t *left64 = (const uint64_t *)left; 131 const uint64_t *right64 = (const uint64_t *)right; 132 return (left64[0] - right64[0]) | (left64[1] - right64[1]); 133#else 134 return (left->s6_addr32[0] - right->s6_addr32[0]) | 135 (left->s6_addr32[1] - right->s6_addr32[1]) | 136 (left->s6_addr32[2] - right->s6_addr32[2]) | 137 (left->s6_addr32[3] - right->s6_addr32[3]); 138#endif 139} 140 141#endif /* SFXGE_LRO */ 142 143void 144sfxge_rx_qflush_done(struct sfxge_rxq *rxq) 145{ 146 147 rxq->flush_state = SFXGE_FLUSH_DONE; 148} 149 150void 151sfxge_rx_qflush_failed(struct sfxge_rxq *rxq) 152{ 153 154 rxq->flush_state = SFXGE_FLUSH_FAILED; 155} 156 157static uint8_t toep_key[] = { 158 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 159 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 160 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 161 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 162 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 163}; 164 165static void 166sfxge_rx_post_refill(void *arg) 167{ 168 struct sfxge_rxq *rxq = arg; 169 struct sfxge_softc *sc; 170 unsigned int index; 171 struct sfxge_evq *evq; 172 uint16_t magic; 173 174 sc = rxq->sc; 175 index = rxq->index; 176 evq = sc->evq[index]; 177 178 magic = SFXGE_MAGIC_RX_QREFILL | index; 179 180 /* This is guaranteed due to the start/stop order of rx and ev */ 181 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 182 ("evq not started")); 183 KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 184 ("rxq not started")); 185 efx_ev_qpost(evq->common, magic); 186} 187 188static void 189sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying) 190{ 191 /* Initially retry after 100 ms, but back off in case of 192 * repeated failures as we probably have to wait for the 193 * administrator to raise the pool limit. */ 194 if (retrying) 195 rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz); 196 else 197 rxq->refill_delay = hz / 10; 198 199 callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay, 200 sfxge_rx_post_refill, rxq); 201} 202 203static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc) 204{ 205 struct mb_args args; 206 struct mbuf *m; 207 208 /* Allocate mbuf structure */ 209 args.flags = M_PKTHDR; 210 args.type = MT_DATA; 211 m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT); 212 213 /* Allocate (and attach) packet buffer */ 214 if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) { 215 uma_zfree(zone_mbuf, m); 216 m = NULL; 217 } 218 219 return (m); 220} 221 222#define SFXGE_REFILL_BATCH 64 223 224static void 225sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying) 226{ 227 struct sfxge_softc *sc; 228 unsigned int index; 229 struct sfxge_evq *evq; 230 unsigned int batch; 231 unsigned int rxfill; 232 unsigned int mblksize; 233 int ntodo; 234 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; 235 236 sc = rxq->sc; 237 index = rxq->index; 238 evq = sc->evq[index]; 239 240 prefetch_read_many(sc->enp); 241 prefetch_read_many(rxq->common); 242 243 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 244 245 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 246 return; 247 248 rxfill = rxq->added - rxq->completed; 249 KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries), 250 ("rxfill > EFX_RXQ_LIMIT(rxq->entries)")); 251 ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target); 252 KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries), 253 ("ntodo > EFX_RQX_LIMIT(rxq->entries)")); 254 255 if (ntodo == 0) 256 return; 257 258 batch = 0; 259 mblksize = sc->rx_buffer_size; 260 while (ntodo-- > 0) { 261 unsigned int id; 262 struct sfxge_rx_sw_desc *rx_desc; 263 bus_dma_segment_t seg; 264 struct mbuf *m; 265 266 id = (rxq->added + batch) & rxq->ptr_mask; 267 rx_desc = &rxq->queue[id]; 268 KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL")); 269 270 rx_desc->flags = EFX_DISCARD; 271 m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc); 272 if (m == NULL) 273 break; 274 sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg); 275 addr[batch++] = seg.ds_addr; 276 277 if (batch == SFXGE_REFILL_BATCH) { 278 efx_rx_qpost(rxq->common, addr, mblksize, batch, 279 rxq->completed, rxq->added); 280 rxq->added += batch; 281 batch = 0; 282 } 283 } 284 285 if (ntodo != 0) 286 sfxge_rx_schedule_refill(rxq, retrying); 287 288 if (batch != 0) { 289 efx_rx_qpost(rxq->common, addr, mblksize, batch, 290 rxq->completed, rxq->added); 291 rxq->added += batch; 292 } 293 294 /* Make the descriptors visible to the hardware */ 295 bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map, 296 BUS_DMASYNC_PREWRITE); 297 298 efx_rx_qpush(rxq->common, rxq->added); 299} 300 301void 302sfxge_rx_qrefill(struct sfxge_rxq *rxq) 303{ 304 305 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 306 return; 307 308 /* Make sure the queue is full */ 309 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE); 310} 311 312static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m) 313{ 314 struct ifnet *ifp = sc->ifnet; 315 316 m->m_pkthdr.rcvif = ifp; 317 m->m_pkthdr.csum_data = 0xffff; 318 ifp->if_input(ifp, m); 319} 320 321static void 322sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc) 323{ 324 struct mbuf *m = rx_desc->mbuf; 325 int flags = rx_desc->flags; 326 int csum_flags; 327 328 /* Convert checksum flags */ 329 csum_flags = (flags & EFX_CKSUM_IPV4) ? 330 (CSUM_IP_CHECKED | CSUM_IP_VALID) : 0; 331 if (flags & EFX_CKSUM_TCPUDP) 332 csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 333 334 if (flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) { 335 m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 336 mtod(m, uint8_t *)); 337 /* The hash covers a 4-tuple for TCP only */ 338 M_HASHTYPE_SET(m, 339 (flags & EFX_PKT_IPV4) ? 340 ((flags & EFX_PKT_TCP) ? 341 M_HASHTYPE_RSS_TCP_IPV4 : M_HASHTYPE_RSS_IPV4) : 342 ((flags & EFX_PKT_TCP) ? 343 M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_IPV6)); 344 } 345 m->m_data += sc->rx_prefix_size; 346 m->m_len = rx_desc->size - sc->rx_prefix_size; 347 m->m_pkthdr.len = m->m_len; 348 m->m_pkthdr.csum_flags = csum_flags; 349 __sfxge_rx_deliver(sc, rx_desc->mbuf); 350 351 rx_desc->flags = EFX_DISCARD; 352 rx_desc->mbuf = NULL; 353} 354 355#ifdef SFXGE_LRO 356 357static void 358sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c) 359{ 360 struct sfxge_softc *sc = st->sc; 361 struct mbuf *m = c->mbuf; 362 struct tcphdr *c_th; 363 int csum_flags; 364 365 KASSERT(m, ("no mbuf to deliver")); 366 367 ++st->n_bursts; 368 369 /* Finish off packet munging and recalculate IP header checksum. */ 370 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 371 struct ip *iph = c->nh; 372 iph->ip_len = htons(iph->ip_len); 373 iph->ip_sum = 0; 374 iph->ip_sum = in_cksum_hdr(iph); 375 c_th = (struct tcphdr *)(iph + 1); 376 csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | 377 CSUM_IP_CHECKED | CSUM_IP_VALID); 378 } else { 379 struct ip6_hdr *iph = c->nh; 380 iph->ip6_plen = htons(iph->ip6_plen); 381 c_th = (struct tcphdr *)(iph + 1); 382 csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 383 } 384 385 c_th->th_win = c->th_last->th_win; 386 c_th->th_ack = c->th_last->th_ack; 387 if (c_th->th_off == c->th_last->th_off) { 388 /* Copy TCP options (take care to avoid going negative). */ 389 int optlen = ((c_th->th_off - 5) & 0xf) << 2u; 390 memcpy(c_th + 1, c->th_last + 1, optlen); 391 } 392 393 m->m_pkthdr.flowid = c->conn_hash; 394 M_HASHTYPE_SET(m, 395 SFXGE_LRO_CONN_IS_TCPIPV4(c) ? 396 M_HASHTYPE_RSS_TCP_IPV4 : M_HASHTYPE_RSS_TCP_IPV6); 397 398 m->m_pkthdr.csum_flags = csum_flags; 399 __sfxge_rx_deliver(sc, m); 400 401 c->mbuf = NULL; 402 c->delivered = 1; 403} 404 405/* Drop the given connection, and add it to the free list. */ 406static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 407{ 408 unsigned bucket; 409 410 KASSERT(!c->mbuf, ("found orphaned mbuf")); 411 412 if (c->next_buf.mbuf != NULL) { 413 sfxge_rx_deliver(rxq->sc, &c->next_buf); 414 LIST_REMOVE(c, active_link); 415 } 416 417 bucket = c->conn_hash & rxq->lro.conns_mask; 418 KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong")); 419 --rxq->lro.conns_n[bucket]; 420 TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 421 TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link); 422} 423 424/* Stop tracking connections that have gone idle in order to keep hash 425 * chains short. 426 */ 427static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now) 428{ 429 struct sfxge_lro_conn *c; 430 unsigned i; 431 432 KASSERT(LIST_EMPTY(&rxq->lro.active_conns), 433 ("found active connections")); 434 435 rxq->lro.last_purge_ticks = now; 436 for (i = 0; i <= rxq->lro.conns_mask; ++i) { 437 if (TAILQ_EMPTY(&rxq->lro.conns[i])) 438 continue; 439 440 c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq); 441 if (now - c->last_pkt_ticks > lro_idle_ticks) { 442 ++rxq->lro.n_drop_idle; 443 sfxge_lro_drop(rxq, c); 444 } 445 } 446} 447 448static void 449sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 450 struct mbuf *mbuf, struct tcphdr *th) 451{ 452 struct tcphdr *c_th; 453 454 /* Tack the new mbuf onto the chain. */ 455 KASSERT(!mbuf->m_next, ("mbuf already chained")); 456 c->mbuf_tail->m_next = mbuf; 457 c->mbuf_tail = mbuf; 458 459 /* Increase length appropriately */ 460 c->mbuf->m_pkthdr.len += mbuf->m_len; 461 462 /* Update the connection state flags */ 463 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 464 struct ip *iph = c->nh; 465 iph->ip_len += mbuf->m_len; 466 c_th = (struct tcphdr *)(iph + 1); 467 } else { 468 struct ip6_hdr *iph = c->nh; 469 iph->ip6_plen += mbuf->m_len; 470 c_th = (struct tcphdr *)(iph + 1); 471 } 472 c_th->th_flags |= (th->th_flags & TH_PUSH); 473 c->th_last = th; 474 ++st->n_merges; 475 476 /* Pass packet up now if another segment could overflow the IP 477 * length. 478 */ 479 if (c->mbuf->m_pkthdr.len > 65536 - 9200) 480 sfxge_lro_deliver(st, c); 481} 482 483static void 484sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 485 struct mbuf *mbuf, void *nh, struct tcphdr *th) 486{ 487 /* Start the chain */ 488 c->mbuf = mbuf; 489 c->mbuf_tail = c->mbuf; 490 c->nh = nh; 491 c->th_last = th; 492 493 mbuf->m_pkthdr.len = mbuf->m_len; 494 495 /* Mangle header fields for later processing */ 496 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 497 struct ip *iph = nh; 498 iph->ip_len = ntohs(iph->ip_len); 499 } else { 500 struct ip6_hdr *iph = nh; 501 iph->ip6_plen = ntohs(iph->ip6_plen); 502 } 503} 504 505/* Try to merge or otherwise hold or deliver (as appropriate) the 506 * packet buffered for this connection (c->next_buf). Return a flag 507 * indicating whether the connection is still active for LRO purposes. 508 */ 509static int 510sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 511{ 512 struct sfxge_rx_sw_desc *rx_buf = &c->next_buf; 513 char *eh = c->next_eh; 514 int data_length, hdr_length, dont_merge; 515 unsigned th_seq, pkt_length; 516 struct tcphdr *th; 517 unsigned now; 518 519 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 520 struct ip *iph = c->next_nh; 521 th = (struct tcphdr *)(iph + 1); 522 pkt_length = ntohs(iph->ip_len) + (char *) iph - eh; 523 } else { 524 struct ip6_hdr *iph = c->next_nh; 525 th = (struct tcphdr *)(iph + 1); 526 pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh; 527 } 528 529 hdr_length = (char *) th + th->th_off * 4 - eh; 530 data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) - 531 hdr_length); 532 th_seq = ntohl(th->th_seq); 533 dont_merge = ((data_length <= 0) 534 | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN))); 535 536 /* Check for options other than aligned timestamp. */ 537 if (th->th_off != 5) { 538 const uint32_t *opt_ptr = (const uint32_t *) (th + 1); 539 if (th->th_off == 8 && 540 opt_ptr[0] == ntohl((TCPOPT_NOP << 24) | 541 (TCPOPT_NOP << 16) | 542 (TCPOPT_TIMESTAMP << 8) | 543 TCPOLEN_TIMESTAMP)) { 544 /* timestamp option -- okay */ 545 } else { 546 dont_merge = 1; 547 } 548 } 549 550 if (__predict_false(th_seq != c->next_seq)) { 551 /* Out-of-order, so start counting again. */ 552 if (c->mbuf != NULL) 553 sfxge_lro_deliver(&rxq->lro, c); 554 c->n_in_order_pkts -= lro_loss_packets; 555 c->next_seq = th_seq + data_length; 556 ++rxq->lro.n_misorder; 557 goto deliver_buf_out; 558 } 559 c->next_seq = th_seq + data_length; 560 561 now = ticks; 562 if (now - c->last_pkt_ticks > lro_idle_ticks) { 563 ++rxq->lro.n_drop_idle; 564 if (c->mbuf != NULL) 565 sfxge_lro_deliver(&rxq->lro, c); 566 sfxge_lro_drop(rxq, c); 567 return (0); 568 } 569 c->last_pkt_ticks = ticks; 570 571 if (c->n_in_order_pkts < lro_slow_start_packets) { 572 /* May be in slow-start, so don't merge. */ 573 ++rxq->lro.n_slow_start; 574 ++c->n_in_order_pkts; 575 goto deliver_buf_out; 576 } 577 578 if (__predict_false(dont_merge)) { 579 if (c->mbuf != NULL) 580 sfxge_lro_deliver(&rxq->lro, c); 581 if (th->th_flags & (TH_FIN | TH_RST)) { 582 ++rxq->lro.n_drop_closed; 583 sfxge_lro_drop(rxq, c); 584 return (0); 585 } 586 goto deliver_buf_out; 587 } 588 589 rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size; 590 591 if (__predict_true(c->mbuf != NULL)) { 592 /* Remove headers and any padding */ 593 rx_buf->mbuf->m_data += hdr_length; 594 rx_buf->mbuf->m_len = data_length; 595 596 sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th); 597 } else { 598 /* Remove any padding */ 599 rx_buf->mbuf->m_len = pkt_length; 600 601 sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th); 602 } 603 604 rx_buf->mbuf = NULL; 605 return (1); 606 607 deliver_buf_out: 608 sfxge_rx_deliver(rxq->sc, rx_buf); 609 return (1); 610} 611 612static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash, 613 uint16_t l2_id, void *nh, struct tcphdr *th) 614{ 615 unsigned bucket = conn_hash & st->conns_mask; 616 struct sfxge_lro_conn *c; 617 618 if (st->conns_n[bucket] >= lro_chain_max) { 619 ++st->n_too_many; 620 return; 621 } 622 623 if (!TAILQ_EMPTY(&st->free_conns)) { 624 c = TAILQ_FIRST(&st->free_conns); 625 TAILQ_REMOVE(&st->free_conns, c, link); 626 } else { 627 c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT); 628 if (c == NULL) 629 return; 630 c->mbuf = NULL; 631 c->next_buf.mbuf = NULL; 632 } 633 634 /* Create the connection tracking data */ 635 ++st->conns_n[bucket]; 636 TAILQ_INSERT_HEAD(&st->conns[bucket], c, link); 637 c->l2_id = l2_id; 638 c->conn_hash = conn_hash; 639 c->source = th->th_sport; 640 c->dest = th->th_dport; 641 c->n_in_order_pkts = 0; 642 c->last_pkt_ticks = *(volatile int *)&ticks; 643 c->delivered = 0; 644 ++st->n_new_stream; 645 /* NB. We don't initialise c->next_seq, and it doesn't matter what 646 * value it has. Most likely the next packet received for this 647 * connection will not match -- no harm done. 648 */ 649} 650 651/* Process mbuf and decide whether to dispatch it to the stack now or 652 * later. 653 */ 654static void 655sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 656{ 657 struct sfxge_softc *sc = rxq->sc; 658 struct mbuf *m = rx_buf->mbuf; 659 struct ether_header *eh; 660 struct sfxge_lro_conn *c; 661 uint16_t l2_id; 662 uint16_t l3_proto; 663 void *nh; 664 struct tcphdr *th; 665 uint32_t conn_hash; 666 unsigned bucket; 667 668 /* Get the hardware hash */ 669 conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 670 mtod(m, uint8_t *)); 671 672 eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size); 673 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 674 struct ether_vlan_header *veh = (struct ether_vlan_header *)eh; 675 l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) | 676 SFXGE_LRO_L2_ID_VLAN; 677 l3_proto = veh->evl_proto; 678 nh = veh + 1; 679 } else { 680 l2_id = 0; 681 l3_proto = eh->ether_type; 682 nh = eh + 1; 683 } 684 685 /* Check whether this is a suitable packet (unfragmented 686 * TCP/IPv4 or TCP/IPv6). If so, find the TCP header and 687 * length, and compute a hash if necessary. If not, return. 688 */ 689 if (l3_proto == htons(ETHERTYPE_IP)) { 690 struct ip *iph = nh; 691 692 KASSERT(iph->ip_p == IPPROTO_TCP, 693 ("IPv4 protocol is not TCP, but packet marker is set")); 694 if ((iph->ip_hl - (sizeof(*iph) >> 2u)) | 695 (iph->ip_off & htons(IP_MF | IP_OFFMASK))) 696 goto deliver_now; 697 th = (struct tcphdr *)(iph + 1); 698 } else if (l3_proto == htons(ETHERTYPE_IPV6)) { 699 struct ip6_hdr *iph = nh; 700 701 KASSERT(iph->ip6_nxt == IPPROTO_TCP, 702 ("IPv6 next header is not TCP, but packet marker is set")); 703 l2_id |= SFXGE_LRO_L2_ID_IPV6; 704 th = (struct tcphdr *)(iph + 1); 705 } else { 706 goto deliver_now; 707 } 708 709 bucket = conn_hash & rxq->lro.conns_mask; 710 711 TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) { 712 if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash)) 713 continue; 714 if ((c->source - th->th_sport) | (c->dest - th->th_dport)) 715 continue; 716 if (c->mbuf != NULL) { 717 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 718 struct ip *c_iph, *iph = nh; 719 c_iph = c->nh; 720 if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) | 721 (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr)) 722 continue; 723 } else { 724 struct ip6_hdr *c_iph, *iph = nh; 725 c_iph = c->nh; 726 if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) | 727 ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst)) 728 continue; 729 } 730 } 731 732 /* Re-insert at head of list to reduce lookup time. */ 733 TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 734 TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link); 735 736 if (c->next_buf.mbuf != NULL) { 737 if (!sfxge_lro_try_merge(rxq, c)) 738 goto deliver_now; 739 } else { 740 LIST_INSERT_HEAD(&rxq->lro.active_conns, c, 741 active_link); 742 } 743 c->next_buf = *rx_buf; 744 c->next_eh = eh; 745 c->next_nh = nh; 746 747 rx_buf->mbuf = NULL; 748 rx_buf->flags = EFX_DISCARD; 749 return; 750 } 751 752 sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th); 753 deliver_now: 754 sfxge_rx_deliver(sc, rx_buf); 755} 756 757static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 758{ 759 struct sfxge_lro_state *st = &rxq->lro; 760 struct sfxge_lro_conn *c; 761 unsigned t; 762 763 while (!LIST_EMPTY(&st->active_conns)) { 764 c = LIST_FIRST(&st->active_conns); 765 if (!c->delivered && c->mbuf != NULL) 766 sfxge_lro_deliver(st, c); 767 if (sfxge_lro_try_merge(rxq, c)) { 768 if (c->mbuf != NULL) 769 sfxge_lro_deliver(st, c); 770 LIST_REMOVE(c, active_link); 771 } 772 c->delivered = 0; 773 } 774 775 t = *(volatile int *)&ticks; 776 if (__predict_false(t != st->last_purge_ticks)) 777 sfxge_lro_purge_idle(rxq, t); 778} 779 780#else /* !SFXGE_LRO */ 781 782static void 783sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 784{ 785} 786 787static void 788sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 789{ 790} 791 792#endif /* SFXGE_LRO */ 793 794void 795sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop) 796{ 797 struct sfxge_softc *sc = rxq->sc; 798 int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO; 799 unsigned int index; 800 struct sfxge_evq *evq; 801 unsigned int completed; 802 unsigned int level; 803 struct mbuf *m; 804 struct sfxge_rx_sw_desc *prev = NULL; 805 806 index = rxq->index; 807 evq = sc->evq[index]; 808 809 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 810 811 completed = rxq->completed; 812 while (completed != rxq->pending) { 813 unsigned int id; 814 struct sfxge_rx_sw_desc *rx_desc; 815 816 id = completed++ & rxq->ptr_mask; 817 rx_desc = &rxq->queue[id]; 818 m = rx_desc->mbuf; 819 820 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 821 goto discard; 822 823 if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD)) 824 goto discard; 825 826 prefetch_read_many(mtod(m, caddr_t)); 827 828 /* Check for loopback packets */ 829 if (!(rx_desc->flags & EFX_PKT_IPV4) && 830 !(rx_desc->flags & EFX_PKT_IPV6)) { 831 struct ether_header *etherhp; 832 833 /*LINTED*/ 834 etherhp = mtod(m, struct ether_header *); 835 836 if (etherhp->ether_type == 837 htons(SFXGE_ETHERTYPE_LOOPBACK)) { 838 EFSYS_PROBE(loopback); 839 840 rxq->loopback++; 841 goto discard; 842 } 843 } 844 845 /* Pass packet up the stack or into LRO (pipelined) */ 846 if (prev != NULL) { 847 if (lro_enabled && 848 ((prev->flags & (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)) == 849 (EFX_PKT_TCP | EFX_CKSUM_TCPUDP))) 850 sfxge_lro(rxq, prev); 851 else 852 sfxge_rx_deliver(sc, prev); 853 } 854 prev = rx_desc; 855 continue; 856 857discard: 858 /* Return the packet to the pool */ 859 m_free(m); 860 rx_desc->mbuf = NULL; 861 } 862 rxq->completed = completed; 863 864 level = rxq->added - rxq->completed; 865 866 /* Pass last packet up the stack or into LRO */ 867 if (prev != NULL) { 868 if (lro_enabled && 869 ((prev->flags & (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)) == 870 (EFX_PKT_TCP | EFX_CKSUM_TCPUDP))) 871 sfxge_lro(rxq, prev); 872 else 873 sfxge_rx_deliver(sc, prev); 874 } 875 876 /* 877 * If there are any pending flows and this is the end of the 878 * poll then they must be completed. 879 */ 880 if (eop) 881 sfxge_lro_end_of_burst(rxq); 882 883 /* Top up the queue if necessary */ 884 if (level < rxq->refill_threshold) 885 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE); 886} 887 888static void 889sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index) 890{ 891 struct sfxge_rxq *rxq; 892 struct sfxge_evq *evq; 893 unsigned int count; 894 895 rxq = sc->rxq[index]; 896 evq = sc->evq[index]; 897 898 SFXGE_EVQ_LOCK(evq); 899 900 KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 901 ("rxq not started")); 902 903 rxq->init_state = SFXGE_RXQ_INITIALIZED; 904 905 callout_stop(&rxq->refill_callout); 906 907again: 908 rxq->flush_state = SFXGE_FLUSH_PENDING; 909 910 /* Flush the receive queue */ 911 efx_rx_qflush(rxq->common); 912 913 SFXGE_EVQ_UNLOCK(evq); 914 915 count = 0; 916 do { 917 /* Spin for 100 ms */ 918 DELAY(100000); 919 920 if (rxq->flush_state != SFXGE_FLUSH_PENDING) 921 break; 922 923 } while (++count < 20); 924 925 SFXGE_EVQ_LOCK(evq); 926 927 if (rxq->flush_state == SFXGE_FLUSH_FAILED) 928 goto again; 929 930 rxq->flush_state = SFXGE_FLUSH_DONE; 931 932 rxq->pending = rxq->added; 933 sfxge_rx_qcomplete(rxq, B_TRUE); 934 935 KASSERT(rxq->completed == rxq->pending, 936 ("rxq->completed != rxq->pending")); 937 938 rxq->added = 0; 939 rxq->pending = 0; 940 rxq->completed = 0; 941 rxq->loopback = 0; 942 943 /* Destroy the common code receive queue. */ 944 efx_rx_qdestroy(rxq->common); 945 946 efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 947 EFX_RXQ_NBUFS(sc->rxq_entries)); 948 949 SFXGE_EVQ_UNLOCK(evq); 950} 951 952static int 953sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index) 954{ 955 struct sfxge_rxq *rxq; 956 efsys_mem_t *esmp; 957 struct sfxge_evq *evq; 958 int rc; 959 960 rxq = sc->rxq[index]; 961 esmp = &rxq->mem; 962 evq = sc->evq[index]; 963 964 KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 965 ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 966 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 967 ("evq->init_state != SFXGE_EVQ_STARTED")); 968 969 /* Program the buffer table. */ 970 if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp, 971 EFX_RXQ_NBUFS(sc->rxq_entries))) != 0) 972 return (rc); 973 974 /* Create the common code receive queue. */ 975 if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT, 976 esmp, sc->rxq_entries, rxq->buf_base_id, evq->common, 977 &rxq->common)) != 0) 978 goto fail; 979 980 SFXGE_EVQ_LOCK(evq); 981 982 /* Enable the receive queue. */ 983 efx_rx_qenable(rxq->common); 984 985 rxq->init_state = SFXGE_RXQ_STARTED; 986 987 /* Try to fill the queue from the pool. */ 988 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE); 989 990 SFXGE_EVQ_UNLOCK(evq); 991 992 return (0); 993 994fail: 995 efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 996 EFX_RXQ_NBUFS(sc->rxq_entries)); 997 return (rc); 998} 999 1000void 1001sfxge_rx_stop(struct sfxge_softc *sc) 1002{ 1003 int index; 1004 1005 /* Stop the receive queue(s) */ 1006 index = sc->rxq_count; 1007 while (--index >= 0) 1008 sfxge_rx_qstop(sc, index); 1009 1010 sc->rx_prefix_size = 0; 1011 sc->rx_buffer_size = 0; 1012 1013 efx_rx_fini(sc->enp); 1014} 1015 1016int 1017sfxge_rx_start(struct sfxge_softc *sc) 1018{ 1019 struct sfxge_intr *intr; 1020 int index; 1021 int rc; 1022 1023 intr = &sc->intr; 1024 1025 /* Initialize the common code receive module. */ 1026 if ((rc = efx_rx_init(sc->enp)) != 0) 1027 return (rc); 1028 1029 /* Calculate the receive packet buffer size. */ 1030 sc->rx_prefix_size = EFX_RX_PREFIX_SIZE; 1031 sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) + 1032 sc->rx_prefix_size); 1033 1034 /* Select zone for packet buffers */ 1035 if (sc->rx_buffer_size <= MCLBYTES) 1036 sc->rx_buffer_zone = zone_clust; 1037 else if (sc->rx_buffer_size <= MJUMPAGESIZE) 1038 sc->rx_buffer_zone = zone_jumbop; 1039 else if (sc->rx_buffer_size <= MJUM9BYTES) 1040 sc->rx_buffer_zone = zone_jumbo9; 1041 else 1042 sc->rx_buffer_zone = zone_jumbo16; 1043 1044 /* 1045 * Set up the scale table. Enable all hash types and hash insertion. 1046 */ 1047 for (index = 0; index < SFXGE_RX_SCALE_MAX; index++) 1048 sc->rx_indir_table[index] = index % sc->rxq_count; 1049 if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table, 1050 SFXGE_RX_SCALE_MAX)) != 0) 1051 goto fail; 1052 (void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ, 1053 (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) | 1054 (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE); 1055 1056 if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key, 1057 sizeof(toep_key))) != 0) 1058 goto fail; 1059 1060 /* Start the receive queue(s). */ 1061 for (index = 0; index < sc->rxq_count; index++) { 1062 if ((rc = sfxge_rx_qstart(sc, index)) != 0) 1063 goto fail2; 1064 } 1065 1066 return (0); 1067 1068fail2: 1069 while (--index >= 0) 1070 sfxge_rx_qstop(sc, index); 1071 1072fail: 1073 efx_rx_fini(sc->enp); 1074 1075 return (rc); 1076} 1077 1078#ifdef SFXGE_LRO 1079 1080static void sfxge_lro_init(struct sfxge_rxq *rxq) 1081{ 1082 struct sfxge_lro_state *st = &rxq->lro; 1083 unsigned i; 1084 1085 st->conns_mask = lro_table_size - 1; 1086 KASSERT(!((st->conns_mask + 1) & st->conns_mask), 1087 ("lro_table_size must be a power of 2")); 1088 st->sc = rxq->sc; 1089 st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]), 1090 M_SFXGE, M_WAITOK); 1091 st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]), 1092 M_SFXGE, M_WAITOK); 1093 for (i = 0; i <= st->conns_mask; ++i) { 1094 TAILQ_INIT(&st->conns[i]); 1095 st->conns_n[i] = 0; 1096 } 1097 LIST_INIT(&st->active_conns); 1098 TAILQ_INIT(&st->free_conns); 1099} 1100 1101static void sfxge_lro_fini(struct sfxge_rxq *rxq) 1102{ 1103 struct sfxge_lro_state *st = &rxq->lro; 1104 struct sfxge_lro_conn *c; 1105 unsigned i; 1106 1107 /* Return cleanly if sfxge_lro_init() has not been called. */ 1108 if (st->conns == NULL) 1109 return; 1110 1111 KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections")); 1112 1113 for (i = 0; i <= st->conns_mask; ++i) { 1114 while (!TAILQ_EMPTY(&st->conns[i])) { 1115 c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq); 1116 sfxge_lro_drop(rxq, c); 1117 } 1118 } 1119 1120 while (!TAILQ_EMPTY(&st->free_conns)) { 1121 c = TAILQ_FIRST(&st->free_conns); 1122 TAILQ_REMOVE(&st->free_conns, c, link); 1123 KASSERT(!c->mbuf, ("found orphaned mbuf")); 1124 free(c, M_SFXGE); 1125 } 1126 1127 free(st->conns_n, M_SFXGE); 1128 free(st->conns, M_SFXGE); 1129 st->conns = NULL; 1130} 1131 1132#else 1133 1134static void 1135sfxge_lro_init(struct sfxge_rxq *rxq) 1136{ 1137} 1138 1139static void 1140sfxge_lro_fini(struct sfxge_rxq *rxq) 1141{ 1142} 1143 1144#endif /* SFXGE_LRO */ 1145 1146static void 1147sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index) 1148{ 1149 struct sfxge_rxq *rxq; 1150 1151 rxq = sc->rxq[index]; 1152 1153 KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 1154 ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 1155 1156 /* Free the context array and the flow table. */ 1157 free(rxq->queue, M_SFXGE); 1158 sfxge_lro_fini(rxq); 1159 1160 /* Release DMA memory. */ 1161 sfxge_dma_free(&rxq->mem); 1162 1163 sc->rxq[index] = NULL; 1164 1165 free(rxq, M_SFXGE); 1166} 1167 1168static int 1169sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index) 1170{ 1171 struct sfxge_rxq *rxq; 1172 struct sfxge_evq *evq; 1173 efsys_mem_t *esmp; 1174 int rc; 1175 1176 KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count)); 1177 1178 rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK); 1179 rxq->sc = sc; 1180 rxq->index = index; 1181 rxq->entries = sc->rxq_entries; 1182 rxq->ptr_mask = rxq->entries - 1; 1183 rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries); 1184 1185 sc->rxq[index] = rxq; 1186 esmp = &rxq->mem; 1187 1188 evq = sc->evq[index]; 1189 1190 /* Allocate and zero DMA space. */ 1191 if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0) 1192 return (rc); 1193 1194 /* Allocate buffer table entries. */ 1195 sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries), 1196 &rxq->buf_base_id); 1197 1198 /* Allocate the context array and the flow table. */ 1199 rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries, 1200 M_SFXGE, M_WAITOK | M_ZERO); 1201 sfxge_lro_init(rxq); 1202 1203 callout_init(&rxq->refill_callout, B_TRUE); 1204 1205 rxq->init_state = SFXGE_RXQ_INITIALIZED; 1206 1207 return (0); 1208} 1209 1210static const struct { 1211 const char *name; 1212 size_t offset; 1213} sfxge_rx_stats[] = { 1214#define SFXGE_RX_STAT(name, member) \ 1215 { #name, offsetof(struct sfxge_rxq, member) } 1216#ifdef SFXGE_LRO 1217 SFXGE_RX_STAT(lro_merges, lro.n_merges), 1218 SFXGE_RX_STAT(lro_bursts, lro.n_bursts), 1219 SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start), 1220 SFXGE_RX_STAT(lro_misorder, lro.n_misorder), 1221 SFXGE_RX_STAT(lro_too_many, lro.n_too_many), 1222 SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream), 1223 SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle), 1224 SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed) 1225#endif 1226}; 1227 1228static int 1229sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS) 1230{ 1231 struct sfxge_softc *sc = arg1; 1232 unsigned int id = arg2; 1233 unsigned int sum, index; 1234 1235 /* Sum across all RX queues */ 1236 sum = 0; 1237 for (index = 0; index < sc->rxq_count; index++) 1238 sum += *(unsigned int *)((caddr_t)sc->rxq[index] + 1239 sfxge_rx_stats[id].offset); 1240 1241 return (SYSCTL_OUT(req, &sum, sizeof(sum))); 1242} 1243 1244static void 1245sfxge_rx_stat_init(struct sfxge_softc *sc) 1246{ 1247 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); 1248 struct sysctl_oid_list *stat_list; 1249 unsigned int id; 1250 1251 stat_list = SYSCTL_CHILDREN(sc->stats_node); 1252 1253 for (id = 0; id < nitems(sfxge_rx_stats); id++) { 1254 SYSCTL_ADD_PROC( 1255 ctx, stat_list, 1256 OID_AUTO, sfxge_rx_stats[id].name, 1257 CTLTYPE_UINT|CTLFLAG_RD, 1258 sc, id, sfxge_rx_stat_handler, "IU", 1259 ""); 1260 } 1261} 1262 1263void 1264sfxge_rx_fini(struct sfxge_softc *sc) 1265{ 1266 int index; 1267 1268 index = sc->rxq_count; 1269 while (--index >= 0) 1270 sfxge_rx_qfini(sc, index); 1271 1272 sc->rxq_count = 0; 1273} 1274 1275int 1276sfxge_rx_init(struct sfxge_softc *sc) 1277{ 1278 struct sfxge_intr *intr; 1279 int index; 1280 int rc; 1281 1282#ifdef SFXGE_LRO 1283 if (!ISP2(lro_table_size)) { 1284 log(LOG_ERR, "%s=%u must be power of 2", 1285 SFXGE_LRO_PARAM(table_size), lro_table_size); 1286 rc = EINVAL; 1287 goto fail_lro_table_size; 1288 } 1289 1290 if (lro_idle_ticks == 0) 1291 lro_idle_ticks = hz / 10 + 1; /* 100 ms */ 1292#endif 1293 1294 intr = &sc->intr; 1295 1296 sc->rxq_count = intr->n_alloc; 1297 1298 KASSERT(intr->state == SFXGE_INTR_INITIALIZED, 1299 ("intr->state != SFXGE_INTR_INITIALIZED")); 1300 1301 /* Initialize the receive queue(s) - one per interrupt. */ 1302 for (index = 0; index < sc->rxq_count; index++) { 1303 if ((rc = sfxge_rx_qinit(sc, index)) != 0) 1304 goto fail; 1305 } 1306 1307 sfxge_rx_stat_init(sc); 1308 1309 return (0); 1310 1311fail: 1312 /* Tear down the receive queue(s). */ 1313 while (--index >= 0) 1314 sfxge_rx_qfini(sc, index); 1315 1316 sc->rxq_count = 0; 1317 1318#ifdef SFXGE_LRO 1319fail_lro_table_size: 1320#endif 1321 return (rc); 1322} 1323