sfxge_rx.c revision 283206
11590Srgrimes/*- 21590Srgrimes * Copyright (c) 2010-2011 Solarflare Communications, Inc. 31590Srgrimes * All rights reserved. 41590Srgrimes * 51590Srgrimes * This software was developed in part by Philip Paeps under contract for 61590Srgrimes * Solarflare Communications, Inc. 71590Srgrimes * 81590Srgrimes * Redistribution and use in source and binary forms, with or without 91590Srgrimes * modification, are permitted provided that the following conditions 101590Srgrimes * are met: 111590Srgrimes * 1. Redistributions of source code must retain the above copyright 121590Srgrimes * notice, this list of conditions and the following disclaimer. 131590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141590Srgrimes * notice, this list of conditions and the following disclaimer in the 151590Srgrimes * documentation and/or other materials provided with the distribution. 161590Srgrimes * 171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 181590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 211590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271590Srgrimes * SUCH DAMAGE. 281590Srgrimes */ 291590Srgrimes 301590Srgrimes#include <sys/cdefs.h> 311590Srgrimes__FBSDID("$FreeBSD: stable/10/sys/dev/sfxge/sfxge_rx.c 283206 2015-05-21 09:05:13Z arybchik $"); 321590Srgrimes 331590Srgrimes#include <sys/types.h> 341590Srgrimes#include <sys/mbuf.h> 351590Srgrimes#include <sys/smp.h> 361590Srgrimes#include <sys/socket.h> 371590Srgrimes#include <sys/sysctl.h> 381590Srgrimes#include <sys/limits.h> 391590Srgrimes#include <sys/syslog.h> 401590Srgrimes 411590Srgrimes#include <net/ethernet.h> 421590Srgrimes#include <net/if.h> 431590Srgrimes#include <net/if_vlan_var.h> 441590Srgrimes 451590Srgrimes#include <netinet/in.h> 461590Srgrimes#include <netinet/ip.h> 471590Srgrimes#include <netinet/ip6.h> 481590Srgrimes#include <netinet/tcp.h> 491590Srgrimes 501590Srgrimes#include <machine/in_cksum.h> 511590Srgrimes 521590Srgrimes#include "common/efx.h" 531590Srgrimes 541590Srgrimes 551590Srgrimes#include "sfxge.h" 561590Srgrimes#include "sfxge_rx.h" 577726Sdg 581590Srgrimes#define RX_REFILL_THRESHOLD(_entries) (EFX_RXQ_LIMIT(_entries) * 9 / 10) 591590Srgrimes 601590Srgrimes#ifdef SFXGE_LRO 611590Srgrimes 621590SrgrimesSYSCTL_NODE(_hw_sfxge, OID_AUTO, lro, CTLFLAG_RD, NULL, 631590Srgrimes "Large receive offload (LRO) parameters"); 641590Srgrimes 651590Srgrimes#define SFXGE_LRO_PARAM(_param) SFXGE_PARAM(lro._param) 661590Srgrimes 671590Srgrimes/* Size of the LRO hash table. Must be a power of 2. A larger table 681590Srgrimes * means we can accelerate a larger number of streams. 691590Srgrimes */ 701590Srgrimesstatic unsigned lro_table_size = 128; 711590SrgrimesTUNABLE_INT(SFXGE_LRO_PARAM(table_size), &lro_table_size); 721590SrgrimesSYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, table_size, CTLFLAG_RDTUN, 731590Srgrimes &lro_table_size, 0, 741590Srgrimes "Size of the LRO hash table (must be a power of 2)"); 751590Srgrimes 761590Srgrimes/* Maximum length of a hash chain. If chains get too long then the lookup 771590Srgrimes * time increases and may exceed the benefit of LRO. 781590Srgrimes */ 791590Srgrimesstatic unsigned lro_chain_max = 20; 801590SrgrimesTUNABLE_INT(SFXGE_LRO_PARAM(chain_max), &lro_chain_max); 811590SrgrimesSYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, chain_max, CTLFLAG_RDTUN, 821590Srgrimes &lro_chain_max, 0, 831590Srgrimes "The maximum length of a hash chain"); 841590Srgrimes 851590Srgrimes/* Maximum time (in ticks) that a connection can be idle before it's LRO 861590Srgrimes * state is discarded. 871590Srgrimes */ 881590Srgrimesstatic unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */ 891590SrgrimesTUNABLE_INT(SFXGE_LRO_PARAM(idle_ticks), &lro_idle_ticks); 901590SrgrimesSYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, idle_ticks, CTLFLAG_RDTUN, 911590Srgrimes &lro_idle_ticks, 0, 921590Srgrimes "The maximum time (in ticks) that a connection can be idle " 931590Srgrimes "before it's LRO state is discarded"); 941590Srgrimes 951590Srgrimes/* Number of packets with payload that must arrive in-order before a 961590Srgrimes * connection is eligible for LRO. The idea is we should avoid coalescing 971590Srgrimes * segments when the sender is in slow-start because reducing the ACK rate 981590Srgrimes * can damage performance. 991590Srgrimes */ 1001590Srgrimesstatic int lro_slow_start_packets = 2000; 1011590SrgrimesTUNABLE_INT(SFXGE_LRO_PARAM(slow_start_packets), &lro_slow_start_packets); 1021590SrgrimesSYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, slow_start_packets, CTLFLAG_RDTUN, 1031590Srgrimes &lro_slow_start_packets, 0, 1041590Srgrimes "Number of packets with payload that must arrive in-order before " 1051590Srgrimes "a connection is eligible for LRO"); 1061590Srgrimes 1071590Srgrimes/* Number of packets with payload that must arrive in-order following loss 1081590Srgrimes * before a connection is eligible for LRO. The idea is we should avoid 1091590Srgrimes * coalescing segments when the sender is recovering from loss, because 1101590Srgrimes * reducing the ACK rate can damage performance. 1111590Srgrimes */ 1121590Srgrimesstatic int lro_loss_packets = 20; 1131590SrgrimesTUNABLE_INT(SFXGE_LRO_PARAM(loss_packets), &lro_loss_packets); 1141590SrgrimesSYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, loss_packets, CTLFLAG_RDTUN, 1151590Srgrimes &lro_loss_packets, 0, 1161590Srgrimes "Number of packets with payload that must arrive in-order " 1171590Srgrimes "following loss before a connection is eligible for LRO"); 1181590Srgrimes 1191590Srgrimes/* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */ 1201590Srgrimes#define SFXGE_LRO_L2_ID_VLAN 0x4000 1211590Srgrimes#define SFXGE_LRO_L2_ID_IPV6 0x8000 1221590Srgrimes#define SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN) 1231590Srgrimes#define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6)) 1241590Srgrimes 1251590Srgrimes/* Compare IPv6 addresses, avoiding conditional branches */ 1261590Srgrimesstatic unsigned long ipv6_addr_cmp(const struct in6_addr *left, 1271590Srgrimes const struct in6_addr *right) 1281590Srgrimes{ 1291590Srgrimes#if LONG_BIT == 64 1301590Srgrimes const uint64_t *left64 = (const uint64_t *)left; 1311590Srgrimes const uint64_t *right64 = (const uint64_t *)right; 1321590Srgrimes return (left64[0] - right64[0]) | (left64[1] - right64[1]); 1331590Srgrimes#else 1341590Srgrimes return (left->s6_addr32[0] - right->s6_addr32[0]) | 1351590Srgrimes (left->s6_addr32[1] - right->s6_addr32[1]) | 1361590Srgrimes (left->s6_addr32[2] - right->s6_addr32[2]) | 1378874Srgrimes (left->s6_addr32[3] - right->s6_addr32[3]); 1381590Srgrimes#endif 1391590Srgrimes} 1401590Srgrimes 1411590Srgrimes#endif /* SFXGE_LRO */ 1421590Srgrimes 1431590Srgrimesvoid 1441590Srgrimessfxge_rx_qflush_done(struct sfxge_rxq *rxq) 1451590Srgrimes{ 1461590Srgrimes 1471590Srgrimes rxq->flush_state = SFXGE_FLUSH_DONE; 1481590Srgrimes} 1491590Srgrimes 1501590Srgrimesvoid 1511590Srgrimessfxge_rx_qflush_failed(struct sfxge_rxq *rxq) 1521590Srgrimes{ 1531590Srgrimes 1541590Srgrimes rxq->flush_state = SFXGE_FLUSH_FAILED; 1551590Srgrimes} 1561590Srgrimes 1571590Srgrimesstatic uint8_t toep_key[] = { 1581590Srgrimes 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 1591590Srgrimes 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 1601590Srgrimes 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 1611590Srgrimes 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 1621590Srgrimes 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 1631590Srgrimes}; 1641590Srgrimes 1651590Srgrimesstatic void 1661590Srgrimessfxge_rx_post_refill(void *arg) 1671590Srgrimes{ 1681590Srgrimes struct sfxge_rxq *rxq = arg; 1691590Srgrimes struct sfxge_softc *sc; 1701590Srgrimes unsigned int index; 1711590Srgrimes struct sfxge_evq *evq; 1721590Srgrimes uint16_t magic; 1731590Srgrimes 1741590Srgrimes sc = rxq->sc; 1751590Srgrimes index = rxq->index; 1761590Srgrimes evq = sc->evq[index]; 1771590Srgrimes 1781590Srgrimes magic = SFXGE_MAGIC_RX_QREFILL | index; 1791590Srgrimes 1801590Srgrimes /* This is guaranteed due to the start/stop order of rx and ev */ 1811590Srgrimes KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 1821590Srgrimes ("evq not started")); 1831590Srgrimes KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 1841590Srgrimes ("rxq not started")); 1851590Srgrimes efx_ev_qpost(evq->common, magic); 1861590Srgrimes} 1871590Srgrimes 1881590Srgrimesstatic void 1891590Srgrimessfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying) 1901590Srgrimes{ 1911590Srgrimes /* Initially retry after 100 ms, but back off in case of 1921590Srgrimes * repeated failures as we probably have to wait for the 1931590Srgrimes * administrator to raise the pool limit. */ 1941590Srgrimes if (retrying) 1951590Srgrimes rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz); 1961590Srgrimes else 1971590Srgrimes rxq->refill_delay = hz / 10; 1981590Srgrimes 1991590Srgrimes callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay, 2001590Srgrimes sfxge_rx_post_refill, rxq); 2011590Srgrimes} 2021590Srgrimes 2031590Srgrimesstatic struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc) 2041590Srgrimes{ 2051590Srgrimes struct mb_args args; 2061590Srgrimes struct mbuf *m; 2071590Srgrimes 2081590Srgrimes /* Allocate mbuf structure */ 2091590Srgrimes args.flags = M_PKTHDR; 2101590Srgrimes args.type = MT_DATA; 2111590Srgrimes m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT); 2121590Srgrimes 2131590Srgrimes /* Allocate (and attach) packet buffer */ 2141590Srgrimes if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) { 2151590Srgrimes uma_zfree(zone_mbuf, m); 2161590Srgrimes m = NULL; 2171590Srgrimes } 2188874Srgrimes 2191590Srgrimes return (m); 2201590Srgrimes} 2211590Srgrimes 2221590Srgrimes#define SFXGE_REFILL_BATCH 64 2231590Srgrimes 2241590Srgrimesstatic void 2251590Srgrimessfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying) 2261590Srgrimes{ 2271590Srgrimes struct sfxge_softc *sc; 2281590Srgrimes unsigned int index; 2291590Srgrimes struct sfxge_evq *evq; 2301590Srgrimes unsigned int batch; 2311590Srgrimes unsigned int rxfill; 2321590Srgrimes unsigned int mblksize; 2331590Srgrimes int ntodo; 2341590Srgrimes efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; 2351590Srgrimes 2361590Srgrimes sc = rxq->sc; 2371590Srgrimes index = rxq->index; 2381590Srgrimes evq = sc->evq[index]; 2391590Srgrimes 2401590Srgrimes prefetch_read_many(sc->enp); 2411590Srgrimes prefetch_read_many(rxq->common); 2421590Srgrimes 2431590Srgrimes SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 2441590Srgrimes 2451590Srgrimes if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 2461590Srgrimes return; 2471590Srgrimes 2481590Srgrimes rxfill = rxq->added - rxq->completed; 2491590Srgrimes KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries), 2501590Srgrimes ("rxfill > EFX_RXQ_LIMIT(rxq->entries)")); 2511590Srgrimes ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target); 2521590Srgrimes KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries), 2531590Srgrimes ("ntodo > EFX_RQX_LIMIT(rxq->entries)")); 2541590Srgrimes 2551590Srgrimes if (ntodo == 0) 2561590Srgrimes return; 2571590Srgrimes 2581590Srgrimes batch = 0; 2591590Srgrimes mblksize = sc->rx_buffer_size; 2601590Srgrimes while (ntodo-- > 0) { 2611590Srgrimes unsigned int id; 2621590Srgrimes struct sfxge_rx_sw_desc *rx_desc; 2631590Srgrimes bus_dma_segment_t seg; 2641590Srgrimes struct mbuf *m; 2651590Srgrimes 2661590Srgrimes id = (rxq->added + batch) & rxq->ptr_mask; 2671590Srgrimes rx_desc = &rxq->queue[id]; 2681590Srgrimes KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL")); 2691590Srgrimes 2701590Srgrimes rx_desc->flags = EFX_DISCARD; 2711590Srgrimes m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc); 2721590Srgrimes if (m == NULL) 2731590Srgrimes break; 2741590Srgrimes sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg); 2751590Srgrimes addr[batch++] = seg.ds_addr; 2761590Srgrimes 2771590Srgrimes if (batch == SFXGE_REFILL_BATCH) { 2781590Srgrimes efx_rx_qpost(rxq->common, addr, mblksize, batch, 2791590Srgrimes rxq->completed, rxq->added); 2801590Srgrimes rxq->added += batch; 2811590Srgrimes batch = 0; 2821590Srgrimes } 2831590Srgrimes } 2841590Srgrimes 2851590Srgrimes if (ntodo != 0) 2861590Srgrimes sfxge_rx_schedule_refill(rxq, retrying); 2871590Srgrimes 2881590Srgrimes if (batch != 0) { 2891590Srgrimes efx_rx_qpost(rxq->common, addr, mblksize, batch, 2901590Srgrimes rxq->completed, rxq->added); 2911590Srgrimes rxq->added += batch; 2921590Srgrimes } 2931590Srgrimes 2941590Srgrimes /* Make the descriptors visible to the hardware */ 2951590Srgrimes bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map, 2961590Srgrimes BUS_DMASYNC_PREWRITE); 2971590Srgrimes 2981590Srgrimes efx_rx_qpush(rxq->common, rxq->added); 2991590Srgrimes} 3001590Srgrimes 3011590Srgrimesvoid 3021590Srgrimessfxge_rx_qrefill(struct sfxge_rxq *rxq) 3031590Srgrimes{ 3041590Srgrimes 3051590Srgrimes if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 3061590Srgrimes return; 3071590Srgrimes 3081590Srgrimes /* Make sure the queue is full */ 3091590Srgrimes sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE); 3101590Srgrimes} 3111590Srgrimes 3121590Srgrimesstatic void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m) 3131590Srgrimes{ 3141590Srgrimes struct ifnet *ifp = sc->ifnet; 3151590Srgrimes 3161590Srgrimes m->m_pkthdr.rcvif = ifp; 3171590Srgrimes m->m_pkthdr.csum_data = 0xffff; 3181590Srgrimes ifp->if_input(ifp, m); 3191590Srgrimes} 3201590Srgrimes 3211590Srgrimesstatic void 3221590Srgrimessfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc) 3231590Srgrimes{ 3241590Srgrimes struct mbuf *m = rx_desc->mbuf; 3251590Srgrimes int flags = rx_desc->flags; 3261590Srgrimes int csum_flags; 3271590Srgrimes 3281590Srgrimes /* Convert checksum flags */ 3291590Srgrimes csum_flags = (flags & EFX_CKSUM_IPV4) ? 3301590Srgrimes (CSUM_IP_CHECKED | CSUM_IP_VALID) : 0; 3311590Srgrimes if (flags & EFX_CKSUM_TCPUDP) 3321590Srgrimes csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 3331590Srgrimes 3341590Srgrimes /* The hash covers a 4-tuple for TCP only */ 3351590Srgrimes if (flags & EFX_PKT_TCP) { 3361590Srgrimes m->m_pkthdr.flowid = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 3371590Srgrimes mtod(m, uint8_t *)); 3381590Srgrimes M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 3391590Srgrimes } 3401590Srgrimes m->m_data += sc->rx_prefix_size; 3411590Srgrimes m->m_len = rx_desc->size - sc->rx_prefix_size; 3421590Srgrimes m->m_pkthdr.len = m->m_len; 3431590Srgrimes m->m_pkthdr.csum_flags = csum_flags; 3441590Srgrimes __sfxge_rx_deliver(sc, rx_desc->mbuf); 3451590Srgrimes 3461590Srgrimes rx_desc->flags = EFX_DISCARD; 3471590Srgrimes rx_desc->mbuf = NULL; 3481590Srgrimes} 3491590Srgrimes 3501590Srgrimes#ifdef SFXGE_LRO 3511590Srgrimes 3521590Srgrimesstatic void 3531590Srgrimessfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c) 3541590Srgrimes{ 3551590Srgrimes struct sfxge_softc *sc = st->sc; 3561590Srgrimes struct mbuf *m = c->mbuf; 3571590Srgrimes struct tcphdr *c_th; 3588874Srgrimes int csum_flags; 3591590Srgrimes 3601590Srgrimes KASSERT(m, ("no mbuf to deliver")); 3611590Srgrimes 3621590Srgrimes ++st->n_bursts; 3631590Srgrimes 3641590Srgrimes /* Finish off packet munging and recalculate IP header checksum. */ 3651590Srgrimes if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 3661590Srgrimes struct ip *iph = c->nh; 3671590Srgrimes iph->ip_len = htons(iph->ip_len); 3681590Srgrimes iph->ip_sum = 0; 3691590Srgrimes iph->ip_sum = in_cksum_hdr(iph); 3701590Srgrimes c_th = (struct tcphdr *)(iph + 1); 3711590Srgrimes csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | 3721590Srgrimes CSUM_IP_CHECKED | CSUM_IP_VALID); 3731590Srgrimes } else { 3741590Srgrimes struct ip6_hdr *iph = c->nh; 3751590Srgrimes iph->ip6_plen = htons(iph->ip6_plen); 3761590Srgrimes c_th = (struct tcphdr *)(iph + 1); 3771590Srgrimes csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 3781590Srgrimes } 3791590Srgrimes 3801590Srgrimes c_th->th_win = c->th_last->th_win; 3811590Srgrimes c_th->th_ack = c->th_last->th_ack; 3821590Srgrimes if (c_th->th_off == c->th_last->th_off) { 3831590Srgrimes /* Copy TCP options (take care to avoid going negative). */ 3841590Srgrimes int optlen = ((c_th->th_off - 5) & 0xf) << 2u; 3851590Srgrimes memcpy(c_th + 1, c->th_last + 1, optlen); 3861590Srgrimes } 3871590Srgrimes 3881590Srgrimes m->m_pkthdr.flowid = c->conn_hash; 3891590Srgrimes M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 3901590Srgrimes 3911590Srgrimes m->m_pkthdr.csum_flags = csum_flags; 3921590Srgrimes __sfxge_rx_deliver(sc, m); 3931590Srgrimes 3941590Srgrimes c->mbuf = NULL; 3951590Srgrimes c->delivered = 1; 3961590Srgrimes} 3971590Srgrimes 3981590Srgrimes/* Drop the given connection, and add it to the free list. */ 3991590Srgrimesstatic void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 4001590Srgrimes{ 4011590Srgrimes unsigned bucket; 4021590Srgrimes 4031590Srgrimes KASSERT(!c->mbuf, ("found orphaned mbuf")); 4041590Srgrimes 4051590Srgrimes if (c->next_buf.mbuf != NULL) { 4061590Srgrimes sfxge_rx_deliver(rxq->sc, &c->next_buf); 4071590Srgrimes LIST_REMOVE(c, active_link); 4081590Srgrimes } 4091590Srgrimes 4101590Srgrimes bucket = c->conn_hash & rxq->lro.conns_mask; 4111590Srgrimes KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong")); 4121590Srgrimes --rxq->lro.conns_n[bucket]; 4131590Srgrimes TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 4141590Srgrimes TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link); 4151590Srgrimes} 4161590Srgrimes 4171590Srgrimes/* Stop tracking connections that have gone idle in order to keep hash 4181590Srgrimes * chains short. 4191590Srgrimes */ 4201590Srgrimesstatic void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now) 4211590Srgrimes{ 4221590Srgrimes struct sfxge_lro_conn *c; 4231590Srgrimes unsigned i; 4241590Srgrimes 4251590Srgrimes KASSERT(LIST_EMPTY(&rxq->lro.active_conns), 4261590Srgrimes ("found active connections")); 4271590Srgrimes 4281590Srgrimes rxq->lro.last_purge_ticks = now; 4291590Srgrimes for (i = 0; i <= rxq->lro.conns_mask; ++i) { 4301590Srgrimes if (TAILQ_EMPTY(&rxq->lro.conns[i])) 4311590Srgrimes continue; 4321590Srgrimes 4331590Srgrimes c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq); 4341590Srgrimes if (now - c->last_pkt_ticks > lro_idle_ticks) { 4351590Srgrimes ++rxq->lro.n_drop_idle; 4361590Srgrimes sfxge_lro_drop(rxq, c); 4371590Srgrimes } 4381590Srgrimes } 4391590Srgrimes} 4401590Srgrimes 4411590Srgrimesstatic void 4428874Srgrimessfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 4431590Srgrimes struct mbuf *mbuf, struct tcphdr *th) 4441590Srgrimes{ 4451590Srgrimes struct tcphdr *c_th; 4461590Srgrimes 4471590Srgrimes /* Tack the new mbuf onto the chain. */ 4481590Srgrimes KASSERT(!mbuf->m_next, ("mbuf already chained")); 4491590Srgrimes c->mbuf_tail->m_next = mbuf; 4501590Srgrimes c->mbuf_tail = mbuf; 4511590Srgrimes 4521590Srgrimes /* Increase length appropriately */ 4531590Srgrimes c->mbuf->m_pkthdr.len += mbuf->m_len; 4541590Srgrimes 4551590Srgrimes /* Update the connection state flags */ 4561590Srgrimes if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 4571590Srgrimes struct ip *iph = c->nh; 4581590Srgrimes iph->ip_len += mbuf->m_len; 4591590Srgrimes c_th = (struct tcphdr *)(iph + 1); 4601590Srgrimes } else { 4611590Srgrimes struct ip6_hdr *iph = c->nh; 4621590Srgrimes iph->ip6_plen += mbuf->m_len; 4631590Srgrimes c_th = (struct tcphdr *)(iph + 1); 4641590Srgrimes } 4651590Srgrimes c_th->th_flags |= (th->th_flags & TH_PUSH); 4661590Srgrimes c->th_last = th; 4671590Srgrimes ++st->n_merges; 4681590Srgrimes 4691590Srgrimes /* Pass packet up now if another segment could overflow the IP 4701590Srgrimes * length. 4711590Srgrimes */ 4721590Srgrimes if (c->mbuf->m_pkthdr.len > 65536 - 9200) 4731590Srgrimes sfxge_lro_deliver(st, c); 4741590Srgrimes} 4751590Srgrimes 4761590Srgrimesstatic void 4771590Srgrimessfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c, 4781590Srgrimes struct mbuf *mbuf, void *nh, struct tcphdr *th) 4791590Srgrimes{ 4801590Srgrimes /* Start the chain */ 4811590Srgrimes c->mbuf = mbuf; 4821590Srgrimes c->mbuf_tail = c->mbuf; 4831590Srgrimes c->nh = nh; 4841590Srgrimes c->th_last = th; 4851590Srgrimes 4861590Srgrimes mbuf->m_pkthdr.len = mbuf->m_len; 4871590Srgrimes 4881590Srgrimes /* Mangle header fields for later processing */ 4891590Srgrimes if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 4901590Srgrimes struct ip *iph = nh; 4911590Srgrimes iph->ip_len = ntohs(iph->ip_len); 4921590Srgrimes } else { 4931590Srgrimes struct ip6_hdr *iph = nh; 4941590Srgrimes iph->ip6_plen = ntohs(iph->ip6_plen); 4951590Srgrimes } 4961590Srgrimes} 4971590Srgrimes 4981590Srgrimes/* Try to merge or otherwise hold or deliver (as appropriate) the 4991590Srgrimes * packet buffered for this connection (c->next_buf). Return a flag 5001590Srgrimes * indicating whether the connection is still active for LRO purposes. 5011590Srgrimes */ 5021590Srgrimesstatic int 5031590Srgrimessfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c) 5041590Srgrimes{ 5051590Srgrimes struct sfxge_rx_sw_desc *rx_buf = &c->next_buf; 5061590Srgrimes char *eh = c->next_eh; 5071590Srgrimes int data_length, hdr_length, dont_merge; 5081590Srgrimes unsigned th_seq, pkt_length; 5091590Srgrimes struct tcphdr *th; 5101590Srgrimes unsigned now; 5111590Srgrimes 5121590Srgrimes if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 5131590Srgrimes struct ip *iph = c->next_nh; 5141590Srgrimes th = (struct tcphdr *)(iph + 1); 5151590Srgrimes pkt_length = ntohs(iph->ip_len) + (char *) iph - eh; 5161590Srgrimes } else { 5171590Srgrimes struct ip6_hdr *iph = c->next_nh; 5181590Srgrimes th = (struct tcphdr *)(iph + 1); 5191590Srgrimes pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh; 5201590Srgrimes } 5211590Srgrimes 5221590Srgrimes hdr_length = (char *) th + th->th_off * 4 - eh; 5231590Srgrimes data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) - 5241590Srgrimes hdr_length); 5251590Srgrimes th_seq = ntohl(th->th_seq); 5261590Srgrimes dont_merge = ((data_length <= 0) 5271590Srgrimes | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN))); 5281590Srgrimes 5291590Srgrimes /* Check for options other than aligned timestamp. */ 5301590Srgrimes if (th->th_off != 5) { 5311590Srgrimes const uint32_t *opt_ptr = (const uint32_t *) (th + 1); 5321590Srgrimes if (th->th_off == 8 && 5331590Srgrimes opt_ptr[0] == ntohl((TCPOPT_NOP << 24) | 5341590Srgrimes (TCPOPT_NOP << 16) | 5351590Srgrimes (TCPOPT_TIMESTAMP << 8) | 5361590Srgrimes TCPOLEN_TIMESTAMP)) { 5371590Srgrimes /* timestamp option -- okay */ 5381590Srgrimes } else { 5391590Srgrimes dont_merge = 1; 5401590Srgrimes } 5411590Srgrimes } 5421590Srgrimes 5431590Srgrimes if (__predict_false(th_seq != c->next_seq)) { 5441590Srgrimes /* Out-of-order, so start counting again. */ 5451590Srgrimes if (c->mbuf != NULL) 5461590Srgrimes sfxge_lro_deliver(&rxq->lro, c); 5471590Srgrimes c->n_in_order_pkts -= lro_loss_packets; 5481590Srgrimes c->next_seq = th_seq + data_length; 5491590Srgrimes ++rxq->lro.n_misorder; 5501590Srgrimes goto deliver_buf_out; 5511590Srgrimes } 5521590Srgrimes c->next_seq = th_seq + data_length; 5531590Srgrimes 5541590Srgrimes now = ticks; 5551590Srgrimes if (now - c->last_pkt_ticks > lro_idle_ticks) { 5561590Srgrimes ++rxq->lro.n_drop_idle; 5571590Srgrimes if (c->mbuf != NULL) 5581590Srgrimes sfxge_lro_deliver(&rxq->lro, c); 5591590Srgrimes sfxge_lro_drop(rxq, c); 5601590Srgrimes return (0); 5611590Srgrimes } 5621590Srgrimes c->last_pkt_ticks = ticks; 5631590Srgrimes 5641590Srgrimes if (c->n_in_order_pkts < lro_slow_start_packets) { 5651590Srgrimes /* May be in slow-start, so don't merge. */ 5661590Srgrimes ++rxq->lro.n_slow_start; 5671590Srgrimes ++c->n_in_order_pkts; 5681590Srgrimes goto deliver_buf_out; 5691590Srgrimes } 5701590Srgrimes 5711590Srgrimes if (__predict_false(dont_merge)) { 5721590Srgrimes if (c->mbuf != NULL) 5731590Srgrimes sfxge_lro_deliver(&rxq->lro, c); 5741590Srgrimes if (th->th_flags & (TH_FIN | TH_RST)) { 5751590Srgrimes ++rxq->lro.n_drop_closed; 5761590Srgrimes sfxge_lro_drop(rxq, c); 5771590Srgrimes return (0); 5781590Srgrimes } 5791590Srgrimes goto deliver_buf_out; 5801590Srgrimes } 5811590Srgrimes 5821590Srgrimes rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size; 5831590Srgrimes 5841590Srgrimes if (__predict_true(c->mbuf != NULL)) { 5851590Srgrimes /* Remove headers and any padding */ 5861590Srgrimes rx_buf->mbuf->m_data += hdr_length; 5871590Srgrimes rx_buf->mbuf->m_len = data_length; 5881590Srgrimes 5891590Srgrimes sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th); 5901590Srgrimes } else { 5911590Srgrimes /* Remove any padding */ 5921590Srgrimes rx_buf->mbuf->m_len = pkt_length; 5931590Srgrimes 5941590Srgrimes sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th); 5951590Srgrimes } 5961590Srgrimes 5971590Srgrimes rx_buf->mbuf = NULL; 5981590Srgrimes return (1); 5991590Srgrimes 6001590Srgrimes deliver_buf_out: 6011590Srgrimes sfxge_rx_deliver(rxq->sc, rx_buf); 6021590Srgrimes return (1); 6031590Srgrimes} 6041590Srgrimes 6051590Srgrimesstatic void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash, 6061590Srgrimes uint16_t l2_id, void *nh, struct tcphdr *th) 6071590Srgrimes{ 6081590Srgrimes unsigned bucket = conn_hash & st->conns_mask; 6091590Srgrimes struct sfxge_lro_conn *c; 6101590Srgrimes 6111590Srgrimes if (st->conns_n[bucket] >= lro_chain_max) { 6121590Srgrimes ++st->n_too_many; 6131590Srgrimes return; 6141590Srgrimes } 6151590Srgrimes 6161590Srgrimes if (!TAILQ_EMPTY(&st->free_conns)) { 6178874Srgrimes c = TAILQ_FIRST(&st->free_conns); 6181590Srgrimes TAILQ_REMOVE(&st->free_conns, c, link); 6191590Srgrimes } else { 6201590Srgrimes c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT); 6211590Srgrimes if (c == NULL) 6221590Srgrimes return; 6231590Srgrimes c->mbuf = NULL; 6241590Srgrimes c->next_buf.mbuf = NULL; 6251590Srgrimes } 6261590Srgrimes 6271590Srgrimes /* Create the connection tracking data */ 6281590Srgrimes ++st->conns_n[bucket]; 6291590Srgrimes TAILQ_INSERT_HEAD(&st->conns[bucket], c, link); 6301590Srgrimes c->l2_id = l2_id; 6311590Srgrimes c->conn_hash = conn_hash; 6321590Srgrimes c->source = th->th_sport; 6331590Srgrimes c->dest = th->th_dport; 6341590Srgrimes c->n_in_order_pkts = 0; 6351590Srgrimes c->last_pkt_ticks = *(volatile int *)&ticks; 6368874Srgrimes c->delivered = 0; 6371590Srgrimes ++st->n_new_stream; 6381590Srgrimes /* NB. We don't initialise c->next_seq, and it doesn't matter what 6391590Srgrimes * value it has. Most likely the next packet received for this 6401590Srgrimes * connection will not match -- no harm done. 6411590Srgrimes */ 6421590Srgrimes} 6431590Srgrimes 6441590Srgrimes/* Process mbuf and decide whether to dispatch it to the stack now or 6451590Srgrimes * later. 6461590Srgrimes */ 6471590Srgrimesstatic void 6481590Srgrimessfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 6491590Srgrimes{ 6501590Srgrimes struct sfxge_softc *sc = rxq->sc; 6511590Srgrimes struct mbuf *m = rx_buf->mbuf; 6521590Srgrimes struct ether_header *eh; 6531590Srgrimes struct sfxge_lro_conn *c; 6541590Srgrimes uint16_t l2_id; 6551590Srgrimes uint16_t l3_proto; 6561590Srgrimes void *nh; 6571590Srgrimes struct tcphdr *th; 6581590Srgrimes uint32_t conn_hash; 6591590Srgrimes unsigned bucket; 6601590Srgrimes 6611590Srgrimes /* Get the hardware hash */ 6621590Srgrimes conn_hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_TOEPLITZ, 6631590Srgrimes mtod(m, uint8_t *)); 6641590Srgrimes 6651590Srgrimes eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size); 6661590Srgrimes if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 6671590Srgrimes struct ether_vlan_header *veh = (struct ether_vlan_header *)eh; 6681590Srgrimes l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) | 6691590Srgrimes SFXGE_LRO_L2_ID_VLAN; 6701590Srgrimes l3_proto = veh->evl_proto; 6711590Srgrimes nh = veh + 1; 6721590Srgrimes } else { 6731590Srgrimes l2_id = 0; 6741590Srgrimes l3_proto = eh->ether_type; 6751590Srgrimes nh = eh + 1; 6761590Srgrimes } 6771590Srgrimes 6781590Srgrimes /* Check whether this is a suitable packet (unfragmented 6791590Srgrimes * TCP/IPv4 or TCP/IPv6). If so, find the TCP header and 6801590Srgrimes * length, and compute a hash if necessary. If not, return. 6811590Srgrimes */ 6821590Srgrimes if (l3_proto == htons(ETHERTYPE_IP)) { 6831590Srgrimes struct ip *iph = nh; 6841590Srgrimes 6851590Srgrimes KASSERT(iph->ip_p == IPPROTO_TCP, 6861590Srgrimes ("IPv4 protocol is not TCP, but packet marker is set")); 6871590Srgrimes if ((iph->ip_hl - (sizeof(*iph) >> 2u)) | 6881590Srgrimes (iph->ip_off & htons(IP_MF | IP_OFFMASK))) 6891590Srgrimes goto deliver_now; 6901590Srgrimes th = (struct tcphdr *)(iph + 1); 6911590Srgrimes } else if (l3_proto == htons(ETHERTYPE_IPV6)) { 6921590Srgrimes struct ip6_hdr *iph = nh; 6931590Srgrimes 6941590Srgrimes KASSERT(iph->ip6_nxt == IPPROTO_TCP, 6951590Srgrimes ("IPv6 next header is not TCP, but packet marker is set")); 6961590Srgrimes l2_id |= SFXGE_LRO_L2_ID_IPV6; 6971590Srgrimes th = (struct tcphdr *)(iph + 1); 6981590Srgrimes } else { 6991590Srgrimes goto deliver_now; 7001590Srgrimes } 7011590Srgrimes 7021590Srgrimes bucket = conn_hash & rxq->lro.conns_mask; 7031590Srgrimes 7041590Srgrimes TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) { 7051590Srgrimes if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash)) 7061590Srgrimes continue; 7071590Srgrimes if ((c->source - th->th_sport) | (c->dest - th->th_dport)) 7081590Srgrimes continue; 7091590Srgrimes if (c->mbuf != NULL) { 7101590Srgrimes if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) { 7111590Srgrimes struct ip *c_iph, *iph = nh; 7121590Srgrimes c_iph = c->nh; 7131590Srgrimes if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) | 7141590Srgrimes (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr)) 7151590Srgrimes continue; 7161590Srgrimes } else { 7171590Srgrimes struct ip6_hdr *c_iph, *iph = nh; 7181590Srgrimes c_iph = c->nh; 7191590Srgrimes if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) | 7201590Srgrimes ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst)) 7211590Srgrimes continue; 7221590Srgrimes } 7231590Srgrimes } 7241590Srgrimes 7251590Srgrimes /* Re-insert at head of list to reduce lookup time. */ 7261590Srgrimes TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link); 7271590Srgrimes TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link); 7281590Srgrimes 7291590Srgrimes if (c->next_buf.mbuf != NULL) { 7301590Srgrimes if (!sfxge_lro_try_merge(rxq, c)) 7311590Srgrimes goto deliver_now; 7321590Srgrimes } else { 7331590Srgrimes LIST_INSERT_HEAD(&rxq->lro.active_conns, c, 7341590Srgrimes active_link); 7351590Srgrimes } 7361590Srgrimes c->next_buf = *rx_buf; 7371590Srgrimes c->next_eh = eh; 7381590Srgrimes c->next_nh = nh; 7391590Srgrimes 7401590Srgrimes rx_buf->mbuf = NULL; 7411590Srgrimes rx_buf->flags = EFX_DISCARD; 7421590Srgrimes return; 7431590Srgrimes } 7441590Srgrimes 7451590Srgrimes sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th); 7461590Srgrimes deliver_now: 7471590Srgrimes sfxge_rx_deliver(sc, rx_buf); 748} 749 750static void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 751{ 752 struct sfxge_lro_state *st = &rxq->lro; 753 struct sfxge_lro_conn *c; 754 unsigned t; 755 756 while (!LIST_EMPTY(&st->active_conns)) { 757 c = LIST_FIRST(&st->active_conns); 758 if (!c->delivered && c->mbuf != NULL) 759 sfxge_lro_deliver(st, c); 760 if (sfxge_lro_try_merge(rxq, c)) { 761 if (c->mbuf != NULL) 762 sfxge_lro_deliver(st, c); 763 LIST_REMOVE(c, active_link); 764 } 765 c->delivered = 0; 766 } 767 768 t = *(volatile int *)&ticks; 769 if (__predict_false(t != st->last_purge_ticks)) 770 sfxge_lro_purge_idle(rxq, t); 771} 772 773#else /* !SFXGE_LRO */ 774 775static void 776sfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf) 777{ 778} 779 780static void 781sfxge_lro_end_of_burst(struct sfxge_rxq *rxq) 782{ 783} 784 785#endif /* SFXGE_LRO */ 786 787void 788sfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop) 789{ 790 struct sfxge_softc *sc = rxq->sc; 791 int lro_enabled = sc->ifnet->if_capenable & IFCAP_LRO; 792 unsigned int index; 793 struct sfxge_evq *evq; 794 unsigned int completed; 795 unsigned int level; 796 struct mbuf *m; 797 struct sfxge_rx_sw_desc *prev = NULL; 798 799 index = rxq->index; 800 evq = sc->evq[index]; 801 802 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 803 804 completed = rxq->completed; 805 while (completed != rxq->pending) { 806 unsigned int id; 807 struct sfxge_rx_sw_desc *rx_desc; 808 809 id = completed++ & rxq->ptr_mask; 810 rx_desc = &rxq->queue[id]; 811 m = rx_desc->mbuf; 812 813 if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED)) 814 goto discard; 815 816 if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD)) 817 goto discard; 818 819 prefetch_read_many(mtod(m, caddr_t)); 820 821 /* Check for loopback packets */ 822 if (!(rx_desc->flags & EFX_PKT_IPV4) && 823 !(rx_desc->flags & EFX_PKT_IPV6)) { 824 struct ether_header *etherhp; 825 826 /*LINTED*/ 827 etherhp = mtod(m, struct ether_header *); 828 829 if (etherhp->ether_type == 830 htons(SFXGE_ETHERTYPE_LOOPBACK)) { 831 EFSYS_PROBE(loopback); 832 833 rxq->loopback++; 834 goto discard; 835 } 836 } 837 838 /* Pass packet up the stack or into LRO (pipelined) */ 839 if (prev != NULL) { 840 if (lro_enabled && 841 ((prev->flags & (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)) == 842 (EFX_PKT_TCP | EFX_CKSUM_TCPUDP))) 843 sfxge_lro(rxq, prev); 844 else 845 sfxge_rx_deliver(sc, prev); 846 } 847 prev = rx_desc; 848 continue; 849 850discard: 851 /* Return the packet to the pool */ 852 m_free(m); 853 rx_desc->mbuf = NULL; 854 } 855 rxq->completed = completed; 856 857 level = rxq->added - rxq->completed; 858 859 /* Pass last packet up the stack or into LRO */ 860 if (prev != NULL) { 861 if (lro_enabled && 862 ((prev->flags & (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)) == 863 (EFX_PKT_TCP | EFX_CKSUM_TCPUDP))) 864 sfxge_lro(rxq, prev); 865 else 866 sfxge_rx_deliver(sc, prev); 867 } 868 869 /* 870 * If there are any pending flows and this is the end of the 871 * poll then they must be completed. 872 */ 873 if (eop) 874 sfxge_lro_end_of_burst(rxq); 875 876 /* Top up the queue if necessary */ 877 if (level < rxq->refill_threshold) 878 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE); 879} 880 881static void 882sfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index) 883{ 884 struct sfxge_rxq *rxq; 885 struct sfxge_evq *evq; 886 unsigned int count; 887 888 rxq = sc->rxq[index]; 889 evq = sc->evq[index]; 890 891 SFXGE_EVQ_LOCK(evq); 892 893 KASSERT(rxq->init_state == SFXGE_RXQ_STARTED, 894 ("rxq not started")); 895 896 rxq->init_state = SFXGE_RXQ_INITIALIZED; 897 898 callout_stop(&rxq->refill_callout); 899 900again: 901 rxq->flush_state = SFXGE_FLUSH_PENDING; 902 903 /* Flush the receive queue */ 904 efx_rx_qflush(rxq->common); 905 906 SFXGE_EVQ_UNLOCK(evq); 907 908 count = 0; 909 do { 910 /* Spin for 100 ms */ 911 DELAY(100000); 912 913 if (rxq->flush_state != SFXGE_FLUSH_PENDING) 914 break; 915 916 } while (++count < 20); 917 918 SFXGE_EVQ_LOCK(evq); 919 920 if (rxq->flush_state == SFXGE_FLUSH_FAILED) 921 goto again; 922 923 rxq->flush_state = SFXGE_FLUSH_DONE; 924 925 rxq->pending = rxq->added; 926 sfxge_rx_qcomplete(rxq, B_TRUE); 927 928 KASSERT(rxq->completed == rxq->pending, 929 ("rxq->completed != rxq->pending")); 930 931 rxq->added = 0; 932 rxq->pending = 0; 933 rxq->completed = 0; 934 rxq->loopback = 0; 935 936 /* Destroy the common code receive queue. */ 937 efx_rx_qdestroy(rxq->common); 938 939 efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 940 EFX_RXQ_NBUFS(sc->rxq_entries)); 941 942 SFXGE_EVQ_UNLOCK(evq); 943} 944 945static int 946sfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index) 947{ 948 struct sfxge_rxq *rxq; 949 efsys_mem_t *esmp; 950 struct sfxge_evq *evq; 951 int rc; 952 953 rxq = sc->rxq[index]; 954 esmp = &rxq->mem; 955 evq = sc->evq[index]; 956 957 KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 958 ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 959 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 960 ("evq->init_state != SFXGE_EVQ_STARTED")); 961 962 /* Program the buffer table. */ 963 if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp, 964 EFX_RXQ_NBUFS(sc->rxq_entries))) != 0) 965 return (rc); 966 967 /* Create the common code receive queue. */ 968 if ((rc = efx_rx_qcreate(sc->enp, index, index, EFX_RXQ_TYPE_DEFAULT, 969 esmp, sc->rxq_entries, rxq->buf_base_id, evq->common, 970 &rxq->common)) != 0) 971 goto fail; 972 973 SFXGE_EVQ_LOCK(evq); 974 975 /* Enable the receive queue. */ 976 efx_rx_qenable(rxq->common); 977 978 rxq->init_state = SFXGE_RXQ_STARTED; 979 980 /* Try to fill the queue from the pool. */ 981 sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE); 982 983 SFXGE_EVQ_UNLOCK(evq); 984 985 return (0); 986 987fail: 988 efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id, 989 EFX_RXQ_NBUFS(sc->rxq_entries)); 990 return (rc); 991} 992 993void 994sfxge_rx_stop(struct sfxge_softc *sc) 995{ 996 int index; 997 998 /* Stop the receive queue(s) */ 999 index = sc->rxq_count; 1000 while (--index >= 0) 1001 sfxge_rx_qstop(sc, index); 1002 1003 sc->rx_prefix_size = 0; 1004 sc->rx_buffer_size = 0; 1005 1006 efx_rx_fini(sc->enp); 1007} 1008 1009int 1010sfxge_rx_start(struct sfxge_softc *sc) 1011{ 1012 struct sfxge_intr *intr; 1013 int index; 1014 int rc; 1015 1016 intr = &sc->intr; 1017 1018 /* Initialize the common code receive module. */ 1019 if ((rc = efx_rx_init(sc->enp)) != 0) 1020 return (rc); 1021 1022 /* Calculate the receive packet buffer size. */ 1023 sc->rx_prefix_size = EFX_RX_PREFIX_SIZE; 1024 sc->rx_buffer_size = (EFX_MAC_PDU(sc->ifnet->if_mtu) + 1025 sc->rx_prefix_size); 1026 1027 /* Select zone for packet buffers */ 1028 if (sc->rx_buffer_size <= MCLBYTES) 1029 sc->rx_buffer_zone = zone_clust; 1030 else if (sc->rx_buffer_size <= MJUMPAGESIZE) 1031 sc->rx_buffer_zone = zone_jumbop; 1032 else if (sc->rx_buffer_size <= MJUM9BYTES) 1033 sc->rx_buffer_zone = zone_jumbo9; 1034 else 1035 sc->rx_buffer_zone = zone_jumbo16; 1036 1037 /* 1038 * Set up the scale table. Enable all hash types and hash insertion. 1039 */ 1040 for (index = 0; index < SFXGE_RX_SCALE_MAX; index++) 1041 sc->rx_indir_table[index] = index % sc->rxq_count; 1042 if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table, 1043 SFXGE_RX_SCALE_MAX)) != 0) 1044 goto fail; 1045 (void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ, 1046 (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) | 1047 (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE); 1048 1049 if ((rc = efx_rx_scale_toeplitz_ipv4_key_set(sc->enp, toep_key, 1050 sizeof(toep_key))) != 0) 1051 goto fail; 1052 1053 /* Start the receive queue(s). */ 1054 for (index = 0; index < sc->rxq_count; index++) { 1055 if ((rc = sfxge_rx_qstart(sc, index)) != 0) 1056 goto fail2; 1057 } 1058 1059 return (0); 1060 1061fail2: 1062 while (--index >= 0) 1063 sfxge_rx_qstop(sc, index); 1064 1065fail: 1066 efx_rx_fini(sc->enp); 1067 1068 return (rc); 1069} 1070 1071#ifdef SFXGE_LRO 1072 1073static void sfxge_lro_init(struct sfxge_rxq *rxq) 1074{ 1075 struct sfxge_lro_state *st = &rxq->lro; 1076 unsigned i; 1077 1078 st->conns_mask = lro_table_size - 1; 1079 KASSERT(!((st->conns_mask + 1) & st->conns_mask), 1080 ("lro_table_size must be a power of 2")); 1081 st->sc = rxq->sc; 1082 st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]), 1083 M_SFXGE, M_WAITOK); 1084 st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]), 1085 M_SFXGE, M_WAITOK); 1086 for (i = 0; i <= st->conns_mask; ++i) { 1087 TAILQ_INIT(&st->conns[i]); 1088 st->conns_n[i] = 0; 1089 } 1090 LIST_INIT(&st->active_conns); 1091 TAILQ_INIT(&st->free_conns); 1092} 1093 1094static void sfxge_lro_fini(struct sfxge_rxq *rxq) 1095{ 1096 struct sfxge_lro_state *st = &rxq->lro; 1097 struct sfxge_lro_conn *c; 1098 unsigned i; 1099 1100 /* Return cleanly if sfxge_lro_init() has not been called. */ 1101 if (st->conns == NULL) 1102 return; 1103 1104 KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections")); 1105 1106 for (i = 0; i <= st->conns_mask; ++i) { 1107 while (!TAILQ_EMPTY(&st->conns[i])) { 1108 c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq); 1109 sfxge_lro_drop(rxq, c); 1110 } 1111 } 1112 1113 while (!TAILQ_EMPTY(&st->free_conns)) { 1114 c = TAILQ_FIRST(&st->free_conns); 1115 TAILQ_REMOVE(&st->free_conns, c, link); 1116 KASSERT(!c->mbuf, ("found orphaned mbuf")); 1117 free(c, M_SFXGE); 1118 } 1119 1120 free(st->conns_n, M_SFXGE); 1121 free(st->conns, M_SFXGE); 1122 st->conns = NULL; 1123} 1124 1125#else 1126 1127static void 1128sfxge_lro_init(struct sfxge_rxq *rxq) 1129{ 1130} 1131 1132static void 1133sfxge_lro_fini(struct sfxge_rxq *rxq) 1134{ 1135} 1136 1137#endif /* SFXGE_LRO */ 1138 1139static void 1140sfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index) 1141{ 1142 struct sfxge_rxq *rxq; 1143 1144 rxq = sc->rxq[index]; 1145 1146 KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED, 1147 ("rxq->init_state != SFXGE_RXQ_INITIALIZED")); 1148 1149 /* Free the context array and the flow table. */ 1150 free(rxq->queue, M_SFXGE); 1151 sfxge_lro_fini(rxq); 1152 1153 /* Release DMA memory. */ 1154 sfxge_dma_free(&rxq->mem); 1155 1156 sc->rxq[index] = NULL; 1157 1158 free(rxq, M_SFXGE); 1159} 1160 1161static int 1162sfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index) 1163{ 1164 struct sfxge_rxq *rxq; 1165 struct sfxge_evq *evq; 1166 efsys_mem_t *esmp; 1167 int rc; 1168 1169 KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count)); 1170 1171 rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK); 1172 rxq->sc = sc; 1173 rxq->index = index; 1174 rxq->entries = sc->rxq_entries; 1175 rxq->ptr_mask = rxq->entries - 1; 1176 rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries); 1177 1178 sc->rxq[index] = rxq; 1179 esmp = &rxq->mem; 1180 1181 evq = sc->evq[index]; 1182 1183 /* Allocate and zero DMA space. */ 1184 if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0) 1185 return (rc); 1186 1187 /* Allocate buffer table entries. */ 1188 sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries), 1189 &rxq->buf_base_id); 1190 1191 /* Allocate the context array and the flow table. */ 1192 rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries, 1193 M_SFXGE, M_WAITOK | M_ZERO); 1194 sfxge_lro_init(rxq); 1195 1196 callout_init(&rxq->refill_callout, B_TRUE); 1197 1198 rxq->init_state = SFXGE_RXQ_INITIALIZED; 1199 1200 return (0); 1201} 1202 1203static const struct { 1204 const char *name; 1205 size_t offset; 1206} sfxge_rx_stats[] = { 1207#define SFXGE_RX_STAT(name, member) \ 1208 { #name, offsetof(struct sfxge_rxq, member) } 1209#ifdef SFXGE_LRO 1210 SFXGE_RX_STAT(lro_merges, lro.n_merges), 1211 SFXGE_RX_STAT(lro_bursts, lro.n_bursts), 1212 SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start), 1213 SFXGE_RX_STAT(lro_misorder, lro.n_misorder), 1214 SFXGE_RX_STAT(lro_too_many, lro.n_too_many), 1215 SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream), 1216 SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle), 1217 SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed) 1218#endif 1219}; 1220 1221static int 1222sfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS) 1223{ 1224 struct sfxge_softc *sc = arg1; 1225 unsigned int id = arg2; 1226 unsigned int sum, index; 1227 1228 /* Sum across all RX queues */ 1229 sum = 0; 1230 for (index = 0; index < sc->rxq_count; index++) 1231 sum += *(unsigned int *)((caddr_t)sc->rxq[index] + 1232 sfxge_rx_stats[id].offset); 1233 1234 return (SYSCTL_OUT(req, &sum, sizeof(sum))); 1235} 1236 1237static void 1238sfxge_rx_stat_init(struct sfxge_softc *sc) 1239{ 1240 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); 1241 struct sysctl_oid_list *stat_list; 1242 unsigned int id; 1243 1244 stat_list = SYSCTL_CHILDREN(sc->stats_node); 1245 1246 for (id = 0; id < nitems(sfxge_rx_stats); id++) { 1247 SYSCTL_ADD_PROC( 1248 ctx, stat_list, 1249 OID_AUTO, sfxge_rx_stats[id].name, 1250 CTLTYPE_UINT|CTLFLAG_RD, 1251 sc, id, sfxge_rx_stat_handler, "IU", 1252 ""); 1253 } 1254} 1255 1256void 1257sfxge_rx_fini(struct sfxge_softc *sc) 1258{ 1259 int index; 1260 1261 index = sc->rxq_count; 1262 while (--index >= 0) 1263 sfxge_rx_qfini(sc, index); 1264 1265 sc->rxq_count = 0; 1266} 1267 1268int 1269sfxge_rx_init(struct sfxge_softc *sc) 1270{ 1271 struct sfxge_intr *intr; 1272 int index; 1273 int rc; 1274 1275#ifdef SFXGE_LRO 1276 if (!ISP2(lro_table_size)) { 1277 log(LOG_ERR, "%s=%u must be power of 2", 1278 SFXGE_LRO_PARAM(table_size), lro_table_size); 1279 rc = EINVAL; 1280 goto fail_lro_table_size; 1281 } 1282 1283 if (lro_idle_ticks == 0) 1284 lro_idle_ticks = hz / 10 + 1; /* 100 ms */ 1285#endif 1286 1287 intr = &sc->intr; 1288 1289 sc->rxq_count = intr->n_alloc; 1290 1291 KASSERT(intr->state == SFXGE_INTR_INITIALIZED, 1292 ("intr->state != SFXGE_INTR_INITIALIZED")); 1293 1294 /* Initialize the receive queue(s) - one per interrupt. */ 1295 for (index = 0; index < sc->rxq_count; index++) { 1296 if ((rc = sfxge_rx_qinit(sc, index)) != 0) 1297 goto fail; 1298 } 1299 1300 sfxge_rx_stat_init(sc); 1301 1302 return (0); 1303 1304fail: 1305 /* Tear down the receive queue(s). */ 1306 while (--index >= 0) 1307 sfxge_rx_qfini(sc, index); 1308 1309 sc->rxq_count = 0; 1310 1311#ifdef SFXGE_LRO 1312fail_lro_table_size: 1313#endif 1314 return (rc); 1315} 1316