sfxge_rx.c revision 280376
1/*- 2 * Copyright (c) 2010-2011 Solarflare Communications, Inc. 3 * All rights reserved. 4 * 5 * This software was developed in part by Philip Paeps under contract for 6 * Solarflare Communications, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/sfxge/sfxge_rx.c 280376 2015-03-23 15:47:37Z arybchik $"); 32 33#include <sys/types.h> 34#include <sys/mbuf.h> 35#include <sys/smp.h> 36#include <sys/socket.h> 37#include <sys/sysctl.h> 38#include <sys/limits.h> 39#include <sys/syslog.h> 40 41#include <net/ethernet.h> 42#include <net/if.h> 43#include <net/if_vlan_var.h> 44 45#include <netinet/in.h> 46#include <netinet/ip.h> 47#include <netinet/ip6.h> 48#include <netinet/tcp.h> 49 50#include <machine/in_cksum.h> 51 52#include "common/efx.h" 53 54 55#include "sfxge.h" 56#include "sfxge_rx.h" 57 58#define RX_REFILL_THRESHOLD(_entries) (EFX_RXQ_LIMIT(_entries) * 9 / 10) 59 60#ifdef SFXGE_LRO 61 62SYSCTL_NODE(_hw_sfxge, OID_AUTO, lro, CTLFLAG_RD, NULL, 63 "Large receive offload (LRO) parameters"); 64 65#define SFXGE_LRO_PARAM(_param) SFXGE_PARAM(lro._param) 66 67/* Size of the LRO hash table. Must be a power of 2. A larger table 68 * means we can accelerate a larger number of streams. 69 */ 70static unsigned lro_table_size = 128; 71TUNABLE_INT(SFXGE_LRO_PARAM(table_size), &lro_table_size); 72SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, table_size, CTLFLAG_RDTUN, 73 &lro_table_size, 0, 74 "Size of the LRO hash table (must be a power of 2)"); 75 76/* Maximum length of a hash chain. If chains get too long then the lookup 77 * time increases and may exceed the benefit of LRO. 78 */ 79static unsigned lro_chain_max = 20; 80TUNABLE_INT(SFXGE_LRO_PARAM(chain_max), &lro_chain_max); 81SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, chain_max, CTLFLAG_RDTUN, 82 &lro_chain_max, 0, 83 "The maximum length of a hash chain"); 84 85/* Maximum time (in ticks) that a connection can be idle before it's LRO 86 * state is discarded. 87 */ 88static unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */ 89TUNABLE_INT(SFXGE_LRO_PARAM(idle_ticks), &lro_idle_ticks); 90SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, idle_ticks, CTLFLAG_RDTUN, 91 &lro_idle_ticks, 0, 92 "The maximum time (in ticks) that a connection can be idle " 93 "before it's LRO state is discarded"); 94 95/* Number of packets with payload that must arrive in-order before a 96 * connection is eligible for LRO. The idea is we should avoid coalescing 97 * segments when the sender is in slow-start because reducing the ACK rate 98 * can damage performance. 99 */ 100static int lro_slow_start_packets = 2000; 101TUNABLE_INT(SFXGE_LRO_PARAM(slow_start_packets), &lro_slow_start_packets); 102SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, slow_start_packets, CTLFLAG_RDTUN, 103 &lro_slow_start_packets, 0, 104 "Number of packets with payload that must arrive in-order before " 105 "a connection is eligible for LRO"); 106 107/* Number of packets with payload that must arrive in-order following loss 108 * before a connection is eligible for LRO. The idea is we should avoid 109 * coalescing segments when the sender is recovering from loss, because 110 * reducing the ACK rate can damage performance. 111 */ 112static int lro_loss_packets = 20; 113TUNABLE_INT(SFXGE_LRO_PARAM(loss_packets), &lro_loss_packets); 114SYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, loss_packets, CTLFLAG_RDTUN, 115 &lro_loss_packets, 0, 116 "Number of packets with payload that must arrive in-order " 117 "following loss before a connection is eligible for LRO"); 118 119/* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */ 120#define SFXGE_LRO_L2_ID_VLAN 0x4000 121#define SFXGE_LRO_L2_ID_IPV6 0x8000 122#define SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN) 123#define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6)) 124 125/* Compare IPv6 addresses, avoiding conditional branches */ 126static unsigned long ipv6_addr_cmp(const struct in6_addr *left, 127 const struct in6_addr *right) 128{ 129#if LONG_BIT == 64 130 const uint64_t *left64 = (const uint64_t *)left; 131 const uint64_t *right64 = (const uint64_t *)right; 132 return (left64[0] - right64[0]) | (left64[1] - right64[1]); 133#else 134 return (left->s6_addr32[0] - right->s6_addr32[0]) | 135 (left->s6_addr32[1] - right->s6_addr32[1]) | 136 (left->s6_addr32[2] - right->s6_addr32[2]) | 137 (left->s6_addr32[3] - right->s6_addr32[3]); 138#endif 139} 140 141#endif /* SFXGE_LRO */ 142 143void 144sfxge_rx_qflush_done(struct sfxge_rxq *rxq) 145{ 146 147 rxq->flush_state = SFXGE_FLUSH_DONE; 148} 149 150void 151sfxge_rx_qflush_failed(struct sfxge_rxq *rxq) 152{ 153 154 rxq->flush_state = SFXGE_FLUSH_FAILED; 155} 156 157static uint8_t toep_key[] = { 158 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 159 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 160 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 161 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 162 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 163}; 164 165static void 166sfxge_rx_post_refill(void *arg) 167{ 168 struct sfxge_rxq *rxq = arg; 169 struct sfxge_softc *sc; 170 unsigned int index; 171 struct sfxge_evq *evq; 172 uint16_t magic; 173 174 sc = rxq->sc; 175 index = rxq->index; 176 evq = sc->evq[index]; 177 178 magic = SFXGE_MAGIC_RX_QREFILL | index; 179 180 /* This is guaranteed due to the start/stop order of rx and ev */ 181 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 182 ("evq not started")); 183 KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 184 ("rxq not started")); 185 efx_ev_qpost(evq->common, magic); 186} 187 188static void 189sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying) 190{ 191 /* Initially retry after 100 ms, but back off in case of 192 * repeated failures as we probably have to wait for the 193 * administrator to raise the pool limit. */ 194 if (retrying) 195 rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz); 196 else 197 rxq->refill_delay = hz / 10; 198 199 callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay, 200 sfxge_rx_post_refill, rxq); 201} 202 203static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc) 204{ 205 struct mb_args args; 206 struct mbuf *m; 207 208 /* Allocate mbuf structure */ 209 args.flags = M_PKTHDR; 210 args.type = MT_DATA; 211 m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT); 212 213 /* Allocate (and attach) packet buffer */ 214 if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) { 215 uma_zfree(zone_mbuf, m); 216 m = NULL; 217 } 218 219 return (m); 220} 221 222#define SFXGE_REFILL_BATCH 64 223 224static void 225sfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying) 226{ 227 struct sfxge_softc *sc; 228 unsigned int index; 229 struct sfxge_evq *evq; 230 unsigned int batch; 231 unsigned int rxfill; 232 unsigned int mblksize; 233 int ntodo; 234 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; 235 236 sc = rxq->sc; 237 index = rxq->index; 238 evq = sc->evq[index]; 239 240 prefetch_read_many(sc->enp); 241 prefetch_read_many(rxq->common); 242 243 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 244 245 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 246 return; 247 248 rxfill = rxq->added - rxq->completed; 249 KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries), 250 ("rxfill > EFX_RXQ_LIMIT(rxq->entries)")); 251 ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target); 252 KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries), 253 ("ntodo > EFX_RQX_LIMIT(rxq->entries)")); 254 255 if (ntodo == 0) 256 return; 257 258 batch = 0; 259 mblksize = sc->rx_buffer_size; 260 while (ntodo-- > 0) { 261 unsigned int id; 262 struct sfxge_rx_sw_desc *rx_desc; 263 bus_dma_segment_t seg; 264 struct mbuf *m; 265 266 id = (rxq->added + batch) & rxq->ptr_mask; 267 rx_desc = &rxq->queue[id]; 268 KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL")); 269 270 rx_desc->flags = EFX_DISCARD; 271 m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc); 272 if (m == NULL) 273 break; 274 sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg); 275 addr[batch++] = seg.ds_addr; 276 277 if (batch == SFXGE_REFILL_BATCH) { 278 efx_rx_qpost(rxq->common, addr, mblksize, batch, 279 rxq->completed, rxq->added); 280 rxq->added += batch; 281 batch = 0; 282 } 283 } 284 285 if (ntodo != 0) 286 sfxge_rx_schedule_refill(rxq, retrying); 287 288 if (batch != 0) { 289 efx_rx_qpost(rxq->common, addr, mblksize, batch, 290 rxq->completed, rxq->added); 291 rxq->added += batch; 292 } 293 294 /* Make the descriptors visible to the hardware */ 295 bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map, 296 BUS_DMASYNC_PREWRITE); 297 298 efx_rx_qpush(rxq->common, rxq->added); 299} 300 301void 302sfxge_rx_qrefill(struct sfxge_rxq *rxq) 303{ 304 305 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 306 return; 307 308 /* Make sure the queue is full */ 309 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE); 310} 311 312static void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m) 313{ 314 struct ifnet *ifp = sc->ifnet; 315 316 m->m_pkthdr.rcvif = ifp; 317 m->m_pkthdr.csum_data = 0xffff; 318 ifp->if_input(ifp, m); 319} 320 321static void 322sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc) 323{ 324 struct mbuf *m = rx_desc->mbuf; 325 int csum_flags; 326 327 /* Convert checksum flags */ 328 csum_flags = (rx_desc->flags & EFX_CKSUM_IPV4) ? 329 (CSUM_IP_CHECKED | CSUM_IP_VALID) : 0; 330 if (rx_desc->flags & EFX_CKSUM_TCPUDP) 331 csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 332 333 /* The hash covers a 4-tuple for TCP only */ 334 if (rx_desc->flags & EFX_PKT_TCP) { 335 m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 336 mtod(m, uint8_t *)); 337 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 338 } 339 m->m_data += sc->rx_prefix_size; 340 m->m_len = rx_desc->size - sc->rx_prefix_size; 341 m->m_pkthdr.len = m->m_len; 342 m->m_pkthdr.csum_flags = csum_flags; 343 __sfxge_rx_deliver(sc, rx_desc->mbuf); 344 345 rx_desc->flags = EFX_DISCARD; 346 rx_desc->mbuf = NULL; 347} 348 349#ifdef SFXGE_LRO 350 351static void 352sfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c) 353{ 354 struct sfxge_softc *sc = st->sc; 355 struct mbuf *m = c->mbuf; 356 struct tcphdr *c_th; 357 int csum_flags; 358 359 KASSERT(m, ("no mbuf to deliver")); 360 361 ++st->n_bursts; 362 363 /* Finish off packet munging and recalculate IP header checksum. */ 364 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 365 struct ip *iph = c->nh; 366 iph->ip_len = htons(iph->ip_len); 367 iph->ip_sum = 0; 368 iph->ip_sum = in_cksum_hdr(iph); 369 c_th = (struct tcphdr *)(iph + 1); 370 csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | 371 CSUM_IP_CHECKED | CSUM_IP_VALID); 372 } else { 373 struct ip6_hdr *iph = c->nh; 374 iph->ip6_plen = htons(iph->ip6_plen); 375 c_th = (struct tcphdr *)(iph + 1); 376 csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 377 } 378 379 c_th->th_win = c->th_last->th_win; 380 c_th->th_ack = c->th_last->th_ack; 381 if (c_th->th_off == c->th_last->th_off) { 382 /* Copy TCP options (take care to avoid going negative). */ 383 int optlen = ((c_th->th_off - 5) & 0xf) << 2u; 384 memcpy(c_th + 1, c->th_last + 1, optlen); 385 } 386 387 m->m_pkthdr.flowid = c->conn_hash; 388 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 389 390 m->m_pkthdr.csum_flags = csum_flags; 391 __sfxge_rx_deliver(sc, m); 392 393 c->mbuf = NULL; 394 c->delivered = 1; 395} 396 397/* Drop the given connection, and add it to the free list. */ 398static void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 399{ 400 unsigned bucket; 401 402 KASSERT(!c->mbuf, ("found orphaned mbuf")); 403 404 if (c->next_buf.mbuf != NULL) { 405 sfxge_rx_deliver(rxq->sc, &c->next_buf); 406 LIST_REMOVE(c, active_link); 407 } 408 409 bucket = c->conn_hash & rxq->lro.conns_mask; 410 KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong")); 411 --rxq->lro.conns_n[bucket]; 412 TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 413 TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link); 414} 415 416/* Stop tracking connections that have gone idle in order to keep hash 417 * chains short. 418 */ 419static void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now) 420{ 421 struct sfxge_lro_conn *c; 422 unsigned i; 423 424 KASSERT(LIST_EMPTY(&rxq->lro.active_conns), 425 ("found active connections")); 426 427 rxq->lro.last_purge_ticks = now; 428 for (i = 0; i <= rxq->lro.conns_mask; ++i) { 429 if (TAILQ_EMPTY(&rxq->lro.conns[i])) 430 continue; 431 432 c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq); 433 if (now - c->last_pkt_ticks > lro_idle_ticks) { 434 ++rxq->lro.n_drop_idle; 435 sfxge_lro_drop(rxq, c); 436 } 437 } 438} 439 440static void 441sfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 442 struct mbuf *mbuf, struct tcphdr *th) 443{ 444 struct tcphdr *c_th; 445 446 /* Tack the new mbuf onto the chain. */ 447 KASSERT(!mbuf->m_next, ("mbuf already chained")); 448 c->mbuf_tail->m_next = mbuf; 449 c->mbuf_tail = mbuf; 450 451 /* Increase length appropriately */ 452 c->mbuf->m_pkthdr.len += mbuf->m_len; 453 454 /* Update the connection state flags */ 455 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 456 struct ip *iph = c->nh; 457 iph->ip_len += mbuf->m_len; 458 c_th = (struct tcphdr *)(iph + 1); 459 } else { 460 struct ip6_hdr *iph = c->nh; 461 iph->ip6_plen += mbuf->m_len; 462 c_th = (struct tcphdr *)(iph + 1); 463 } 464 c_th->th_flags |= (th->th_flags & TH_PUSH); 465 c->th_last = th; 466 ++st->n_merges; 467 468 /* Pass packet up now if another segment could overflow the IP 469 * length. 470 */ 471 if (c->mbuf->m_pkthdr.len > 65536 - 9200) 472 sfxge_lro_deliver(st, c); 473} 474 475static void 476sfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 477 struct mbuf *mbuf, void *nh, struct tcphdr *th) 478{ 479 /* Start the chain */ 480 c->mbuf = mbuf; 481 c->mbuf_tail = c->mbuf; 482 c->nh = nh; 483 c->th_last = th; 484 485 mbuf->m_pkthdr.len = mbuf->m_len; 486 487 /* Mangle header fields for later processing */ 488 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 489 struct ip *iph = nh; 490 iph->ip_len = ntohs(iph->ip_len); 491 } else { 492 struct ip6_hdr *iph = nh; 493 iph->ip6_plen = ntohs(iph->ip6_plen); 494 } 495} 496 497/* Try to merge or otherwise hold or deliver (as appropriate) the 498 * packet buffered for this connection (c->next_buf). Return a flag 499 * indicating whether the connection is still active for LRO purposes. 500 */ 501static int 502sfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 503{ 504 struct sfxge_rx_sw_desc *rx_buf = &c->next_buf; 505 char *eh = c->next_eh; 506 int data_length, hdr_length, dont_merge; 507 unsigned th_seq, pkt_length; 508 struct tcphdr *th; 509 unsigned now; 510 511 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 512 struct ip *iph = c->next_nh; 513 th = (struct tcphdr *)(iph + 1); 514 pkt_length = ntohs(iph->ip_len) + (char *) iph - eh; 515 } else { 516 struct ip6_hdr *iph = c->next_nh; 517 th = (struct tcphdr *)(iph + 1); 518 pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh; 519 } 520 521 hdr_length = (char *) th + th->th_off * 4 - eh; 522 data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) - 523 hdr_length); 524 th_seq = ntohl(th->th_seq); 525 dont_merge = ((data_length <= 0) 526 | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN))); 527 528 /* Check for options other than aligned timestamp. */ 529 if (th->th_off != 5) { 530 const uint32_t *opt_ptr = (const uint32_t *) (th + 1); 531 if (th->th_off == 8 && 532 opt_ptr[0] == ntohl((TCPOPT_NOP << 24) | 533 (TCPOPT_NOP << 16) | 534 (TCPOPT_TIMESTAMP << 8) | 535 TCPOLEN_TIMESTAMP)) { 536 /* timestamp option -- okay */ 537 } else { 538 dont_merge = 1; 539 } 540 } 541 542 if (__predict_false(th_seq != c->next_seq)) { 543 /* Out-of-order, so start counting again. */ 544 if (c->mbuf != NULL) 545 sfxge_lro_deliver(&rxq->lro, c); 546 c->n_in_order_pkts -= lro_loss_packets; 547 c->next_seq = th_seq + data_length; 548 ++rxq->lro.n_misorder; 549 goto deliver_buf_out; 550 } 551 c->next_seq = th_seq + data_length; 552 553 now = ticks; 554 if (now - c->last_pkt_ticks > lro_idle_ticks) { 555 ++rxq->lro.n_drop_idle; 556 if (c->mbuf != NULL) 557 sfxge_lro_deliver(&rxq->lro, c); 558 sfxge_lro_drop(rxq, c); 559 return (0); 560 } 561 c->last_pkt_ticks = ticks; 562 563 if (c->n_in_order_pkts < lro_slow_start_packets) { 564 /* May be in slow-start, so don't merge. */ 565 ++rxq->lro.n_slow_start; 566 ++c->n_in_order_pkts; 567 goto deliver_buf_out; 568 } 569 570 if (__predict_false(dont_merge)) { 571 if (c->mbuf != NULL) 572 sfxge_lro_deliver(&rxq->lro, c); 573 if (th->th_flags & (TH_FIN | TH_RST)) { 574 ++rxq->lro.n_drop_closed; 575 sfxge_lro_drop(rxq, c); 576 return (0); 577 } 578 goto deliver_buf_out; 579 } 580 581 rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size; 582 583 if (__predict_true(c->mbuf != NULL)) { 584 /* Remove headers and any padding */ 585 rx_buf->mbuf->m_data += hdr_length; 586 rx_buf->mbuf->m_len = data_length; 587 588 sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th); 589 } else { 590 /* Remove any padding */ 591 rx_buf->mbuf->m_len = pkt_length; 592 593 sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th); 594 } 595 596 rx_buf->mbuf = NULL; 597 return (1); 598 599 deliver_buf_out: 600 sfxge_rx_deliver(rxq->sc, rx_buf); 601 return (1); 602} 603 604static void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash, 605 uint16_t l2_id, void *nh, struct tcphdr *th) 606{ 607 unsigned bucket = conn_hash & st->conns_mask; 608 struct sfxge_lro_conn *c; 609 610 if (st->conns_n[bucket] >= lro_chain_max) { 611 ++st->n_too_many; 612 return; 613 } 614 615 if (!TAILQ_EMPTY(&st->free_conns)) { 616 c = TAILQ_FIRST(&st->free_conns); 617 TAILQ_REMOVE(&st->free_conns, c, link); 618 } else { 619 c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT); 620 if (c == NULL) 621 return; 622 c->mbuf = NULL; 623 c->next_buf.mbuf = NULL; 624 } 625 626 /* Create the connection tracking data */ 627 ++st->conns_n[bucket]; 628 TAILQ_INSERT_HEAD(&st->conns[bucket], c, link); 629 c->l2_id = l2_id; 630 c->conn_hash = conn_hash; 631 c->source = th->th_sport; 632 c->dest = th->th_dport; 633 c->n_in_order_pkts = 0; 634 c->last_pkt_ticks = *(volatile int *)&ticks; 635 c->delivered = 0; 636 ++st->n_new_stream; 637 /* NB. We don't initialise c->next_seq, and it doesn't matter what 638 * value it has. Most likely the next packet received for this 639 * connection will not match -- no harm done. 640 */ 641} 642 643/* Process mbuf and decide whether to dispatch it to the stack now or 644 * later. 645 */ 646static void 647sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 648{ 649 struct sfxge_softc *sc = rxq->sc; 650 struct mbuf *m = rx_buf->mbuf; 651 struct ether_header *eh; 652 struct sfxge_lro_conn *c; 653 uint16_t l2_id; 654 uint16_t l3_proto; 655 void *nh; 656 struct tcphdr *th; 657 uint32_t conn_hash; 658 unsigned bucket; 659 660 /* Get the hardware hash */ 661 conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 662 mtod(m, uint8_t *)); 663 664 eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size); 665 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 666 struct ether_vlan_header *veh = (struct ether_vlan_header *)eh; 667 l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) | 668 SFXGE_LRO_L2_ID_VLAN; 669 l3_proto = veh->evl_proto; 670 nh = veh + 1; 671 } else { 672 l2_id = 0; 673 l3_proto = eh->ether_type; 674 nh = eh + 1; 675 } 676 677 /* Check whether this is a suitable packet (unfragmented 678 * TCP/IPv4 or TCP/IPv6). If so, find the TCP header and 679 * length, and compute a hash if necessary. If not, return. 680 */ 681 if (l3_proto == htons(ETHERTYPE_IP)) { 682 struct ip *iph = nh; 683 if ((iph->ip_p - IPPROTO_TCP) | 684 (iph->ip_hl - (sizeof(*iph) >> 2u)) | 685 (iph->ip_off & htons(IP_MF | IP_OFFMASK))) 686 goto deliver_now; 687 th = (struct tcphdr *)(iph + 1); 688 } else if (l3_proto == htons(ETHERTYPE_IPV6)) { 689 struct ip6_hdr *iph = nh; 690 if (iph->ip6_nxt != IPPROTO_TCP) 691 goto deliver_now; 692 l2_id |= SFXGE_LRO_L2_ID_IPV6; 693 th = (struct tcphdr *)(iph + 1); 694 } else { 695 goto deliver_now; 696 } 697 698 bucket = conn_hash & rxq->lro.conns_mask; 699 700 TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) { 701 if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash)) 702 continue; 703 if ((c->source - th->th_sport) | (c->dest - th->th_dport)) 704 continue; 705 if (c->mbuf != NULL) { 706 if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 707 struct ip *c_iph, *iph = nh; 708 c_iph = c->nh; 709 if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) | 710 (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr)) 711 continue; 712 } else { 713 struct ip6_hdr *c_iph, *iph = nh; 714 c_iph = c->nh; 715 if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) | 716 ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst)) 717 continue; 718 } 719 } 720 721 /* Re-insert at head of list to reduce lookup time. */ 722 TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 723 TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link); 724 725 if (c->next_buf.mbuf != NULL) { 726 if (!sfxge_lro_try_merge(rxq, c)) 727 goto deliver_now; 728 } else { 729 LIST_INSERT_HEAD(&rxq->lro.active_conns, c, 730 active_link); 731 } 732 c->next_buf = *rx_buf; 733 c->next_eh = eh; 734 c->next_nh = nh; 735 736 rx_buf->mbuf = NULL; 737 rx_buf->flags = EFX_DISCARD; 738 return; 739 } 740 741 sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th); 742 deliver_now: 743 sfxge_rx_deliver(sc, rx_buf); 744} 745 746static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 747{ 748 struct sfxge_lro_state *st = &rxq->lro; 749 struct sfxge_lro_conn *c; 750 unsigned t; 751 752 while (!LIST_EMPTY(&st->active_conns)) { 753 c = LIST_FIRST(&st->active_conns); 754 if (!c->delivered && c->mbuf != NULL) 755 sfxge_lro_deliver(st, c); 756 if (sfxge_lro_try_merge(rxq, c)) { 757 if (c->mbuf != NULL) 758 sfxge_lro_deliver(st, c); 759 LIST_REMOVE(c, active_link); 760 } 761 c->delivered = 0; 762 } 763 764 t = *(volatile int *)&ticks; 765 if (__predict_false(t != st->last_purge_ticks)) 766 sfxge_lro_purge_idle(rxq, t); 767} 768 769#else /* !SFXGE_LRO */ 770 771static void 772sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 773{ 774} 775 776static void 777sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 778{ 779} 780 781#endif /* SFXGE_LRO */ 782 783void 784sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop) 785{ 786 struct sfxge_softc *sc = rxq->sc; 787 int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO; 788 unsigned int index; 789 struct sfxge_evq *evq; 790 unsigned int completed; 791 unsigned int level; 792 struct mbuf *m; 793 struct sfxge_rx_sw_desc *prev = NULL; 794 795 index = rxq->index; 796 evq = sc->evq[index]; 797 798 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 799 800 completed = rxq->completed; 801 while (completed != rxq->pending) { 802 unsigned int id; 803 struct sfxge_rx_sw_desc *rx_desc; 804 805 id = completed++ & rxq->ptr_mask; 806 rx_desc = &rxq->queue[id]; 807 m = rx_desc->mbuf; 808 809 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 810 goto discard; 811 812 if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD)) 813 goto discard; 814 815 prefetch_read_many(mtod(m, caddr_t)); 816 817 /* Check for loopback packets */ 818 if (!(rx_desc->flags & EFX_PKT_IPV4) && 819 !(rx_desc->flags & EFX_PKT_IPV6)) { 820 struct ether_header *etherhp; 821 822 /*LINTED*/ 823 etherhp = mtod(m, struct ether_header *); 824 825 if (etherhp->ether_type == 826 htons(SFXGE_ETHERTYPE_LOOPBACK)) { 827 EFSYS_PROBE(loopback); 828 829 rxq->loopback++; 830 goto discard; 831 } 832 } 833 834 /* Pass packet up the stack or into LRO (pipelined) */ 835 if (prev != NULL) { 836 if (lro_enabled) 837 sfxge_lro(rxq, prev); 838 else 839 sfxge_rx_deliver(sc, prev); 840 } 841 prev = rx_desc; 842 continue; 843 844discard: 845 /* Return the packet to the pool */ 846 m_free(m); 847 rx_desc->mbuf = NULL; 848 } 849 rxq->completed = completed; 850 851 level = rxq->added - rxq->completed; 852 853 /* Pass last packet up the stack or into LRO */ 854 if (prev != NULL) { 855 if (lro_enabled) 856 sfxge_lro(rxq, prev); 857 else 858 sfxge_rx_deliver(sc, prev); 859 } 860 861 /* 862 * If there are any pending flows and this is the end of the 863 * poll then they must be completed. 864 */ 865 if (eop) 866 sfxge_lro_end_of_burst(rxq); 867 868 /* Top up the queue if necessary */ 869 if (level < rxq->refill_threshold) 870 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE); 871} 872 873static void 874sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index) 875{ 876 struct sfxge_rxq *rxq; 877 struct sfxge_evq *evq; 878 unsigned int count; 879 880 rxq = sc->rxq[index]; 881 evq = sc->evq[index]; 882 883 SFXGE_EVQ_LOCK(evq); 884 885 KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 886 ("rxq not started")); 887 888 rxq->init_state = SFXGE_RXQ_INITIALIZED; 889 890 callout_stop(&rxq->refill_callout); 891 892again: 893 rxq->flush_state = SFXGE_FLUSH_PENDING; 894 895 /* Flush the receive queue */ 896 efx_rx_qflush(rxq->common); 897 898 SFXGE_EVQ_UNLOCK(evq); 899 900 count = 0; 901 do { 902 /* Spin for 100 ms */ 903 DELAY(100000); 904 905 if (rxq->flush_state != SFXGE_FLUSH_PENDING) 906 break; 907 908 } while (++count < 20); 909 910 SFXGE_EVQ_LOCK(evq); 911 912 if (rxq->flush_state == SFXGE_FLUSH_FAILED) 913 goto again; 914 915 rxq->flush_state = SFXGE_FLUSH_DONE; 916 917 rxq->pending = rxq->added; 918 sfxge_rx_qcomplete(rxq, B_TRUE); 919 920 KASSERT(rxq->completed == rxq->pending, 921 ("rxq->completed != rxq->pending")); 922 923 rxq->added = 0; 924 rxq->pending = 0; 925 rxq->completed = 0; 926 rxq->loopback = 0; 927 928 /* Destroy the common code receive queue. */ 929 efx_rx_qdestroy(rxq->common); 930 931 efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 932 EFX_RXQ_NBUFS(sc->rxq_entries)); 933 934 SFXGE_EVQ_UNLOCK(evq); 935} 936 937static int 938sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index) 939{ 940 struct sfxge_rxq *rxq; 941 efsys_mem_t *esmp; 942 struct sfxge_evq *evq; 943 int rc; 944 945 rxq = sc->rxq[index]; 946 esmp = &rxq->mem; 947 evq = sc->evq[index]; 948 949 KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 950 ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 951 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 952 ("evq->init_state != SFXGE_EVQ_STARTED")); 953 954 /* Program the buffer table. */ 955 if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp, 956 EFX_RXQ_NBUFS(sc->rxq_entries))) != 0) 957 return (rc); 958 959 /* Create the common code receive queue. */ 960 if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT, 961 esmp, sc->rxq_entries, rxq->buf_base_id, evq->common, 962 &rxq->common)) != 0) 963 goto fail; 964 965 SFXGE_EVQ_LOCK(evq); 966 967 /* Enable the receive queue. */ 968 efx_rx_qenable(rxq->common); 969 970 rxq->init_state = SFXGE_RXQ_STARTED; 971 972 /* Try to fill the queue from the pool. */ 973 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE); 974 975 SFXGE_EVQ_UNLOCK(evq); 976 977 return (0); 978 979fail: 980 efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 981 EFX_RXQ_NBUFS(sc->rxq_entries)); 982 return (rc); 983} 984 985void 986sfxge_rx_stop(struct sfxge_softc *sc) 987{ 988 int index; 989 990 /* Stop the receive queue(s) */ 991 index = sc->rxq_count; 992 while (--index >= 0) 993 sfxge_rx_qstop(sc, index); 994 995 sc->rx_prefix_size = 0; 996 sc->rx_buffer_size = 0; 997 998 efx_rx_fini(sc->enp); 999} 1000 1001int 1002sfxge_rx_start(struct sfxge_softc *sc) 1003{ 1004 struct sfxge_intr *intr; 1005 int index; 1006 int rc; 1007 1008 intr = &sc->intr; 1009 1010 /* Initialize the common code receive module. */ 1011 if ((rc = efx_rx_init(sc->enp)) != 0) 1012 return (rc); 1013 1014 /* Calculate the receive packet buffer size. */ 1015 sc->rx_prefix_size = EFX_RX_PREFIX_SIZE; 1016 sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) + 1017 sc->rx_prefix_size); 1018 1019 /* Select zone for packet buffers */ 1020 if (sc->rx_buffer_size <= MCLBYTES) 1021 sc->rx_buffer_zone = zone_clust; 1022 else if (sc->rx_buffer_size <= MJUMPAGESIZE) 1023 sc->rx_buffer_zone = zone_jumbop; 1024 else if (sc->rx_buffer_size <= MJUM9BYTES) 1025 sc->rx_buffer_zone = zone_jumbo9; 1026 else 1027 sc->rx_buffer_zone = zone_jumbo16; 1028 1029 /* 1030 * Set up the scale table. Enable all hash types and hash insertion. 1031 */ 1032 for (index = 0; index < SFXGE_RX_SCALE_MAX; index++) 1033 sc->rx_indir_table[index] = index % sc->rxq_count; 1034 if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table, 1035 SFXGE_RX_SCALE_MAX)) != 0) 1036 goto fail; 1037 (void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ, 1038 (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) | 1039 (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE); 1040 1041 if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key, 1042 sizeof(toep_key))) != 0) 1043 goto fail; 1044 1045 /* Start the receive queue(s). */ 1046 for (index = 0; index < sc->rxq_count; index++) { 1047 if ((rc = sfxge_rx_qstart(sc, index)) != 0) 1048 goto fail2; 1049 } 1050 1051 return (0); 1052 1053fail2: 1054 while (--index >= 0) 1055 sfxge_rx_qstop(sc, index); 1056 1057fail: 1058 efx_rx_fini(sc->enp); 1059 1060 return (rc); 1061} 1062 1063#ifdef SFXGE_LRO 1064 1065static void sfxge_lro_init(struct sfxge_rxq *rxq) 1066{ 1067 struct sfxge_lro_state *st = &rxq->lro; 1068 unsigned i; 1069 1070 st->conns_mask = lro_table_size - 1; 1071 KASSERT(!((st->conns_mask + 1) & st->conns_mask), 1072 ("lro_table_size must be a power of 2")); 1073 st->sc = rxq->sc; 1074 st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]), 1075 M_SFXGE, M_WAITOK); 1076 st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]), 1077 M_SFXGE, M_WAITOK); 1078 for (i = 0; i <= st->conns_mask; ++i) { 1079 TAILQ_INIT(&st->conns[i]); 1080 st->conns_n[i] = 0; 1081 } 1082 LIST_INIT(&st->active_conns); 1083 TAILQ_INIT(&st->free_conns); 1084} 1085 1086static void sfxge_lro_fini(struct sfxge_rxq *rxq) 1087{ 1088 struct sfxge_lro_state *st = &rxq->lro; 1089 struct sfxge_lro_conn *c; 1090 unsigned i; 1091 1092 /* Return cleanly if sfxge_lro_init() has not been called. */ 1093 if (st->conns == NULL) 1094 return; 1095 1096 KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections")); 1097 1098 for (i = 0; i <= st->conns_mask; ++i) { 1099 while (!TAILQ_EMPTY(&st->conns[i])) { 1100 c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq); 1101 sfxge_lro_drop(rxq, c); 1102 } 1103 } 1104 1105 while (!TAILQ_EMPTY(&st->free_conns)) { 1106 c = TAILQ_FIRST(&st->free_conns); 1107 TAILQ_REMOVE(&st->free_conns, c, link); 1108 KASSERT(!c->mbuf, ("found orphaned mbuf")); 1109 free(c, M_SFXGE); 1110 } 1111 1112 free(st->conns_n, M_SFXGE); 1113 free(st->conns, M_SFXGE); 1114 st->conns = NULL; 1115} 1116 1117#else 1118 1119static void 1120sfxge_lro_init(struct sfxge_rxq *rxq) 1121{ 1122} 1123 1124static void 1125sfxge_lro_fini(struct sfxge_rxq *rxq) 1126{ 1127} 1128 1129#endif /* SFXGE_LRO */ 1130 1131static void 1132sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index) 1133{ 1134 struct sfxge_rxq *rxq; 1135 1136 rxq = sc->rxq[index]; 1137 1138 KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 1139 ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 1140 1141 /* Free the context array and the flow table. */ 1142 free(rxq->queue, M_SFXGE); 1143 sfxge_lro_fini(rxq); 1144 1145 /* Release DMA memory. */ 1146 sfxge_dma_free(&rxq->mem); 1147 1148 sc->rxq[index] = NULL; 1149 1150 free(rxq, M_SFXGE); 1151} 1152 1153static int 1154sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index) 1155{ 1156 struct sfxge_rxq *rxq; 1157 struct sfxge_evq *evq; 1158 efsys_mem_t *esmp; 1159 int rc; 1160 1161 KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count)); 1162 1163 rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK); 1164 rxq->sc = sc; 1165 rxq->index = index; 1166 rxq->entries = sc->rxq_entries; 1167 rxq->ptr_mask = rxq->entries - 1; 1168 rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries); 1169 1170 sc->rxq[index] = rxq; 1171 esmp = &rxq->mem; 1172 1173 evq = sc->evq[index]; 1174 1175 /* Allocate and zero DMA space. */ 1176 if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0) 1177 return (rc); 1178 1179 /* Allocate buffer table entries. */ 1180 sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries), 1181 &rxq->buf_base_id); 1182 1183 /* Allocate the context array and the flow table. */ 1184 rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries, 1185 M_SFXGE, M_WAITOK | M_ZERO); 1186 sfxge_lro_init(rxq); 1187 1188 callout_init(&rxq->refill_callout, B_TRUE); 1189 1190 rxq->init_state = SFXGE_RXQ_INITIALIZED; 1191 1192 return (0); 1193} 1194 1195static const struct { 1196 const char *name; 1197 size_t offset; 1198} sfxge_rx_stats[] = { 1199#define SFXGE_RX_STAT(name, member) \ 1200 { #name, offsetof(struct sfxge_rxq, member) } 1201#ifdef SFXGE_LRO 1202 SFXGE_RX_STAT(lro_merges, lro.n_merges), 1203 SFXGE_RX_STAT(lro_bursts, lro.n_bursts), 1204 SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start), 1205 SFXGE_RX_STAT(lro_misorder, lro.n_misorder), 1206 SFXGE_RX_STAT(lro_too_many, lro.n_too_many), 1207 SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream), 1208 SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle), 1209 SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed) 1210#endif 1211}; 1212 1213static int 1214sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS) 1215{ 1216 struct sfxge_softc *sc = arg1; 1217 unsigned int id = arg2; 1218 unsigned int sum, index; 1219 1220 /* Sum across all RX queues */ 1221 sum = 0; 1222 for (index = 0; index < sc->rxq_count; index++) 1223 sum += *(unsigned int *)((caddr_t)sc->rxq[index] + 1224 sfxge_rx_stats[id].offset); 1225 1226 return (SYSCTL_OUT(req, &sum, sizeof(sum))); 1227} 1228 1229static void 1230sfxge_rx_stat_init(struct sfxge_softc *sc) 1231{ 1232 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); 1233 struct sysctl_oid_list *stat_list; 1234 unsigned int id; 1235 1236 stat_list = SYSCTL_CHILDREN(sc->stats_node); 1237 1238 for (id = 0; id < nitems(sfxge_rx_stats); id++) { 1239 SYSCTL_ADD_PROC( 1240 ctx, stat_list, 1241 OID_AUTO, sfxge_rx_stats[id].name, 1242 CTLTYPE_UINT|CTLFLAG_RD, 1243 sc, id, sfxge_rx_stat_handler, "IU", 1244 ""); 1245 } 1246} 1247 1248void 1249sfxge_rx_fini(struct sfxge_softc *sc) 1250{ 1251 int index; 1252 1253 index = sc->rxq_count; 1254 while (--index >= 0) 1255 sfxge_rx_qfini(sc, index); 1256 1257 sc->rxq_count = 0; 1258} 1259 1260int 1261sfxge_rx_init(struct sfxge_softc *sc) 1262{ 1263 struct sfxge_intr *intr; 1264 int index; 1265 int rc; 1266 1267#ifdef SFXGE_LRO 1268 if (!ISP2(lro_table_size)) { 1269 log(LOG_ERR, "%s=%u must be power of 2", 1270 SFXGE_LRO_PARAM(table_size), lro_table_size); 1271 rc = EINVAL; 1272 goto fail_lro_table_size; 1273 } 1274 1275 if (lro_idle_ticks == 0) 1276 lro_idle_ticks = hz / 10 + 1; /* 100 ms */ 1277#endif 1278 1279 intr = &sc->intr; 1280 1281 sc->rxq_count = intr->n_alloc; 1282 1283 KASSERT(intr->state == SFXGE_INTR_INITIALIZED, 1284 ("intr->state != SFXGE_INTR_INITIALIZED")); 1285 1286 /* Initialize the receive queue(s) - one per interrupt. */ 1287 for (index = 0; index < sc->rxq_count; index++) { 1288 if ((rc = sfxge_rx_qinit(sc, index)) != 0) 1289 goto fail; 1290 } 1291 1292 sfxge_rx_stat_init(sc); 1293 1294 return (0); 1295 1296fail: 1297 /* Tear down the receive queue(s). */ 1298 while (--index >= 0) 1299 sfxge_rx_qfini(sc, index); 1300 1301 sc->rxq_count = 0; 1302 1303#ifdef SFXGE_LRO 1304fail_lro_table_size: 1305#endif 1306 return (rc); 1307} 1308