1227569Sphilip/*-
2301388Sarybchik * Copyright (c) 2010-2016 Solarflare Communications Inc.
3227569Sphilip * All rights reserved.
4227569Sphilip *
5227569Sphilip * This software was developed in part by Philip Paeps under contract for
6227569Sphilip * Solarflare Communications, Inc.
7227569Sphilip *
8227569Sphilip * Redistribution and use in source and binary forms, with or without
9284555Sarybchik * modification, are permitted provided that the following conditions are met:
10227569Sphilip *
11284555Sarybchik * 1. Redistributions of source code must retain the above copyright notice,
12284555Sarybchik *    this list of conditions and the following disclaimer.
13284555Sarybchik * 2. Redistributions in binary form must reproduce the above copyright notice,
14284555Sarybchik *    this list of conditions and the following disclaimer in the documentation
15284555Sarybchik *    and/or other materials provided with the distribution.
16284555Sarybchik *
17284555Sarybchik * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18284555Sarybchik * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19284555Sarybchik * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20284555Sarybchik * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21284555Sarybchik * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22284555Sarybchik * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23284555Sarybchik * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24284555Sarybchik * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25284555Sarybchik * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26284555Sarybchik * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27284555Sarybchik * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28284555Sarybchik *
29284555Sarybchik * The views and conclusions contained in the software and documentation are
30284555Sarybchik * those of the authors and should not be interpreted as representing official
31284555Sarybchik * policies, either expressed or implied, of the FreeBSD Project.
32227569Sphilip */
33227569Sphilip
34227569Sphilip#include <sys/cdefs.h>
35227569Sphilip__FBSDID("$FreeBSD: stable/10/sys/dev/sfxge/sfxge_rx.c 342475 2018-12-26 09:29:26Z arybchik $");
36227569Sphilip
37227569Sphilip#include <sys/types.h>
38227569Sphilip#include <sys/mbuf.h>
39227569Sphilip#include <sys/smp.h>
40227569Sphilip#include <sys/socket.h>
41227569Sphilip#include <sys/sysctl.h>
42284555Sarybchik#include <sys/syslog.h>
43227569Sphilip#include <sys/limits.h>
44280599Sarybchik#include <sys/syslog.h>
45227569Sphilip
46227569Sphilip#include <net/ethernet.h>
47227569Sphilip#include <net/if.h>
48227569Sphilip#include <net/if_vlan_var.h>
49227569Sphilip
50227569Sphilip#include <netinet/in.h>
51227569Sphilip#include <netinet/ip.h>
52227569Sphilip#include <netinet/ip6.h>
53227569Sphilip#include <netinet/tcp.h>
54227569Sphilip
55227569Sphilip#include <machine/in_cksum.h>
56227569Sphilip
57227569Sphilip#include "common/efx.h"
58227569Sphilip
59227569Sphilip
60227569Sphilip#include "sfxge.h"
61227569Sphilip#include "sfxge_rx.h"
62227569Sphilip
63280502Sarybchik#define	RX_REFILL_THRESHOLD(_entries)	(EFX_RXQ_LIMIT(_entries) * 9 / 10)
64227569Sphilip
65280615Sarybchik#ifdef SFXGE_LRO
66280615Sarybchik
67280599SarybchikSYSCTL_NODE(_hw_sfxge, OID_AUTO, lro, CTLFLAG_RD, NULL,
68280599Sarybchik	    "Large receive offload (LRO) parameters");
69280599Sarybchik
70280599Sarybchik#define	SFXGE_LRO_PARAM(_param)	SFXGE_PARAM(lro._param)
71280599Sarybchik
72227569Sphilip/* Size of the LRO hash table.  Must be a power of 2.  A larger table
73227569Sphilip * means we can accelerate a larger number of streams.
74227569Sphilip */
75227569Sphilipstatic unsigned lro_table_size = 128;
76280599SarybchikTUNABLE_INT(SFXGE_LRO_PARAM(table_size), &lro_table_size);
77280599SarybchikSYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, table_size, CTLFLAG_RDTUN,
78280599Sarybchik	    &lro_table_size, 0,
79280599Sarybchik	    "Size of the LRO hash table (must be a power of 2)");
80227569Sphilip
81227569Sphilip/* Maximum length of a hash chain.  If chains get too long then the lookup
82227569Sphilip * time increases and may exceed the benefit of LRO.
83227569Sphilip */
84227569Sphilipstatic unsigned lro_chain_max = 20;
85280599SarybchikTUNABLE_INT(SFXGE_LRO_PARAM(chain_max), &lro_chain_max);
86280599SarybchikSYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, chain_max, CTLFLAG_RDTUN,
87280599Sarybchik	    &lro_chain_max, 0,
88280599Sarybchik	    "The maximum length of a hash chain");
89227569Sphilip
90227569Sphilip/* Maximum time (in ticks) that a connection can be idle before it's LRO
91227569Sphilip * state is discarded.
92227569Sphilip */
93227569Sphilipstatic unsigned lro_idle_ticks; /* initialised in sfxge_rx_init() */
94280599SarybchikTUNABLE_INT(SFXGE_LRO_PARAM(idle_ticks), &lro_idle_ticks);
95280599SarybchikSYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, idle_ticks, CTLFLAG_RDTUN,
96280599Sarybchik	    &lro_idle_ticks, 0,
97280599Sarybchik	    "The maximum time (in ticks) that a connection can be idle "
98280599Sarybchik	    "before it's LRO state is discarded");
99227569Sphilip
100227569Sphilip/* Number of packets with payload that must arrive in-order before a
101227569Sphilip * connection is eligible for LRO.  The idea is we should avoid coalescing
102227569Sphilip * segments when the sender is in slow-start because reducing the ACK rate
103227569Sphilip * can damage performance.
104227569Sphilip */
105227569Sphilipstatic int lro_slow_start_packets = 2000;
106280599SarybchikTUNABLE_INT(SFXGE_LRO_PARAM(slow_start_packets), &lro_slow_start_packets);
107280599SarybchikSYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, slow_start_packets, CTLFLAG_RDTUN,
108280599Sarybchik	    &lro_slow_start_packets, 0,
109280599Sarybchik	    "Number of packets with payload that must arrive in-order before "
110280599Sarybchik	    "a connection is eligible for LRO");
111227569Sphilip
112227569Sphilip/* Number of packets with payload that must arrive in-order following loss
113227569Sphilip * before a connection is eligible for LRO.  The idea is we should avoid
114227569Sphilip * coalescing segments when the sender is recovering from loss, because
115227569Sphilip * reducing the ACK rate can damage performance.
116227569Sphilip */
117227569Sphilipstatic int lro_loss_packets = 20;
118280599SarybchikTUNABLE_INT(SFXGE_LRO_PARAM(loss_packets), &lro_loss_packets);
119280599SarybchikSYSCTL_UINT(_hw_sfxge_lro, OID_AUTO, loss_packets, CTLFLAG_RDTUN,
120280599Sarybchik	    &lro_loss_packets, 0,
121280599Sarybchik	    "Number of packets with payload that must arrive in-order "
122280599Sarybchik	    "following loss before a connection is eligible for LRO");
123227569Sphilip
124227569Sphilip/* Flags for sfxge_lro_conn::l2_id; must not collide with EVL_VLID_MASK */
125280501Sarybchik#define	SFXGE_LRO_L2_ID_VLAN 0x4000
126280501Sarybchik#define	SFXGE_LRO_L2_ID_IPV6 0x8000
127280501Sarybchik#define	SFXGE_LRO_CONN_IS_VLAN_ENCAP(c) ((c)->l2_id & SFXGE_LRO_L2_ID_VLAN)
128280501Sarybchik#define	SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
129227569Sphilip
130227569Sphilip/* Compare IPv6 addresses, avoiding conditional branches */
131280532Sarybchikstatic unsigned long ipv6_addr_cmp(const struct in6_addr *left,
132280532Sarybchik				   const struct in6_addr *right)
133227569Sphilip{
134227569Sphilip#if LONG_BIT == 64
135227569Sphilip	const uint64_t *left64 = (const uint64_t *)left;
136227569Sphilip	const uint64_t *right64 = (const uint64_t *)right;
137227569Sphilip	return (left64[0] - right64[0]) | (left64[1] - right64[1]);
138227569Sphilip#else
139227569Sphilip	return (left->s6_addr32[0] - right->s6_addr32[0]) |
140227569Sphilip	       (left->s6_addr32[1] - right->s6_addr32[1]) |
141227569Sphilip	       (left->s6_addr32[2] - right->s6_addr32[2]) |
142227569Sphilip	       (left->s6_addr32[3] - right->s6_addr32[3]);
143227569Sphilip#endif
144227569Sphilip}
145227569Sphilip
146280615Sarybchik#endif	/* SFXGE_LRO */
147280615Sarybchik
148227569Sphilipvoid
149227569Sphilipsfxge_rx_qflush_done(struct sfxge_rxq *rxq)
150227569Sphilip{
151227569Sphilip
152227569Sphilip	rxq->flush_state = SFXGE_FLUSH_DONE;
153227569Sphilip}
154227569Sphilip
155227569Sphilipvoid
156227569Sphilipsfxge_rx_qflush_failed(struct sfxge_rxq *rxq)
157227569Sphilip{
158227569Sphilip
159227569Sphilip	rxq->flush_state = SFXGE_FLUSH_FAILED;
160227569Sphilip}
161227569Sphilip
162227569Sphilipstatic uint8_t toep_key[] = {
163227569Sphilip	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
164227569Sphilip	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
165227569Sphilip	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
166227569Sphilip	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
167227569Sphilip	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
168227569Sphilip};
169227569Sphilip
170227569Sphilipstatic void
171227569Sphilipsfxge_rx_post_refill(void *arg)
172227569Sphilip{
173227569Sphilip	struct sfxge_rxq *rxq = arg;
174227569Sphilip	struct sfxge_softc *sc;
175227569Sphilip	unsigned int index;
176227569Sphilip	struct sfxge_evq *evq;
177227569Sphilip	uint16_t magic;
178227569Sphilip
179227569Sphilip	sc = rxq->sc;
180227569Sphilip	index = rxq->index;
181227569Sphilip	evq = sc->evq[index];
182301315Sarybchik	magic = sfxge_sw_ev_rxq_magic(SFXGE_SW_EV_RX_QREFILL, rxq);
183227569Sphilip
184227569Sphilip	/* This is guaranteed due to the start/stop order of rx and ev */
185227569Sphilip	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
186227569Sphilip	    ("evq not started"));
187227569Sphilip	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
188227569Sphilip	    ("rxq not started"));
189227569Sphilip	efx_ev_qpost(evq->common, magic);
190227569Sphilip}
191227569Sphilip
192227569Sphilipstatic void
193227569Sphilipsfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
194227569Sphilip{
195227569Sphilip	/* Initially retry after 100 ms, but back off in case of
196227569Sphilip	 * repeated failures as we probably have to wait for the
197227569Sphilip	 * administrator to raise the pool limit. */
198227569Sphilip	if (retrying)
199227569Sphilip		rxq->refill_delay = min(rxq->refill_delay * 2, 10 * hz);
200227569Sphilip	else
201227569Sphilip		rxq->refill_delay = hz / 10;
202227569Sphilip
203227569Sphilip	callout_reset_curcpu(&rxq->refill_callout, rxq->refill_delay,
204227569Sphilip			     sfxge_rx_post_refill, rxq);
205227569Sphilip}
206227569Sphilip
207280532Sarybchikstatic struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
208227569Sphilip{
209227569Sphilip	struct mb_args args;
210227569Sphilip	struct mbuf *m;
211227569Sphilip
212227569Sphilip	/* Allocate mbuf structure */
213227569Sphilip	args.flags = M_PKTHDR;
214227569Sphilip	args.type = MT_DATA;
215234086Sglebius	m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, M_NOWAIT);
216227569Sphilip
217227569Sphilip	/* Allocate (and attach) packet buffer */
218280501Sarybchik	if (m != NULL && !uma_zalloc_arg(sc->rx_buffer_zone, m, M_NOWAIT)) {
219227569Sphilip		uma_zfree(zone_mbuf, m);
220227569Sphilip		m = NULL;
221227569Sphilip	}
222227569Sphilip
223280501Sarybchik	return (m);
224227569Sphilip}
225227569Sphilip
226227569Sphilip#define	SFXGE_REFILL_BATCH  64
227227569Sphilip
228227569Sphilipstatic void
229227569Sphilipsfxge_rx_qfill(struct sfxge_rxq *rxq, unsigned int target, boolean_t retrying)
230227569Sphilip{
231227569Sphilip	struct sfxge_softc *sc;
232227569Sphilip	unsigned int index;
233227569Sphilip	struct sfxge_evq *evq;
234227569Sphilip	unsigned int batch;
235227569Sphilip	unsigned int rxfill;
236227569Sphilip	unsigned int mblksize;
237227569Sphilip	int ntodo;
238227569Sphilip	efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
239227569Sphilip
240227569Sphilip	sc = rxq->sc;
241227569Sphilip	index = rxq->index;
242227569Sphilip	evq = sc->evq[index];
243227569Sphilip
244227569Sphilip	prefetch_read_many(sc->enp);
245227569Sphilip	prefetch_read_many(rxq->common);
246227569Sphilip
247280522Sarybchik	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
248227569Sphilip
249280596Sarybchik	if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
250227569Sphilip		return;
251227569Sphilip
252227569Sphilip	rxfill = rxq->added - rxq->completed;
253280502Sarybchik	KASSERT(rxfill <= EFX_RXQ_LIMIT(rxq->entries),
254280502Sarybchik	    ("rxfill > EFX_RXQ_LIMIT(rxq->entries)"));
255280502Sarybchik	ntodo = min(EFX_RXQ_LIMIT(rxq->entries) - rxfill, target);
256280502Sarybchik	KASSERT(ntodo <= EFX_RXQ_LIMIT(rxq->entries),
257280502Sarybchik	    ("ntodo > EFX_RQX_LIMIT(rxq->entries)"));
258227569Sphilip
259227569Sphilip	if (ntodo == 0)
260227569Sphilip		return;
261227569Sphilip
262227569Sphilip	batch = 0;
263284555Sarybchik	mblksize = sc->rx_buffer_size - sc->rx_buffer_align;
264227569Sphilip	while (ntodo-- > 0) {
265227569Sphilip		unsigned int id;
266227569Sphilip		struct sfxge_rx_sw_desc *rx_desc;
267227569Sphilip		bus_dma_segment_t seg;
268227569Sphilip		struct mbuf *m;
269227569Sphilip
270280502Sarybchik		id = (rxq->added + batch) & rxq->ptr_mask;
271227569Sphilip		rx_desc = &rxq->queue[id];
272227569Sphilip		KASSERT(rx_desc->mbuf == NULL, ("rx_desc->mbuf != NULL"));
273227569Sphilip
274227569Sphilip		rx_desc->flags = EFX_DISCARD;
275227569Sphilip		m = rx_desc->mbuf = sfxge_rx_alloc_mbuf(sc);
276227569Sphilip		if (m == NULL)
277227569Sphilip			break;
278284555Sarybchik
279284555Sarybchik		/* m_len specifies length of area to be mapped for DMA */
280284555Sarybchik		m->m_len  = mblksize;
281284555Sarybchik		m->m_data = (caddr_t)P2ROUNDUP((uintptr_t)m->m_data, CACHE_LINE_SIZE);
282284555Sarybchik		m->m_data += sc->rx_buffer_align;
283284555Sarybchik
284227569Sphilip		sfxge_map_mbuf_fast(rxq->mem.esm_tag, rxq->mem.esm_map, m, &seg);
285227569Sphilip		addr[batch++] = seg.ds_addr;
286227569Sphilip
287227569Sphilip		if (batch == SFXGE_REFILL_BATCH) {
288227569Sphilip			efx_rx_qpost(rxq->common, addr, mblksize, batch,
289227569Sphilip			    rxq->completed, rxq->added);
290227569Sphilip			rxq->added += batch;
291227569Sphilip			batch = 0;
292227569Sphilip		}
293227569Sphilip	}
294227569Sphilip
295227569Sphilip	if (ntodo != 0)
296227569Sphilip		sfxge_rx_schedule_refill(rxq, retrying);
297227569Sphilip
298227569Sphilip	if (batch != 0) {
299227569Sphilip		efx_rx_qpost(rxq->common, addr, mblksize, batch,
300227569Sphilip		    rxq->completed, rxq->added);
301227569Sphilip		rxq->added += batch;
302227569Sphilip	}
303227569Sphilip
304227569Sphilip	/* Make the descriptors visible to the hardware */
305227569Sphilip	bus_dmamap_sync(rxq->mem.esm_tag, rxq->mem.esm_map,
306227569Sphilip			BUS_DMASYNC_PREWRITE);
307227569Sphilip
308284555Sarybchik	efx_rx_qpush(rxq->common, rxq->added, &rxq->pushed);
309284555Sarybchik
310284555Sarybchik	/* The queue could still be empty if no descriptors were actually
311284555Sarybchik	 * pushed, in which case there will be no event to cause the next
312284555Sarybchik	 * refill, so we must schedule a refill ourselves.
313284555Sarybchik	 */
314284555Sarybchik	if(rxq->pushed == rxq->completed) {
315284555Sarybchik		sfxge_rx_schedule_refill(rxq, retrying);
316284555Sarybchik	}
317227569Sphilip}
318227569Sphilip
319227569Sphilipvoid
320227569Sphilipsfxge_rx_qrefill(struct sfxge_rxq *rxq)
321227569Sphilip{
322227569Sphilip
323280596Sarybchik	if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
324227569Sphilip		return;
325227569Sphilip
326227569Sphilip	/* Make sure the queue is full */
327280502Sarybchik	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_TRUE);
328227569Sphilip}
329227569Sphilip
330227569Sphilipstatic void __sfxge_rx_deliver(struct sfxge_softc *sc, struct mbuf *m)
331227569Sphilip{
332227569Sphilip	struct ifnet *ifp = sc->ifnet;
333227569Sphilip
334227569Sphilip	m->m_pkthdr.rcvif = ifp;
335227569Sphilip	m->m_pkthdr.csum_data = 0xffff;
336227569Sphilip	ifp->if_input(ifp, m);
337227569Sphilip}
338227569Sphilip
339227569Sphilipstatic void
340311083Sarybchiksfxge_rx_deliver(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_desc)
341227569Sphilip{
342311083Sarybchik	struct sfxge_softc *sc = rxq->sc;
343227569Sphilip	struct mbuf *m = rx_desc->mbuf;
344283203Sarybchik	int flags = rx_desc->flags;
345227569Sphilip	int csum_flags;
346227569Sphilip
347227569Sphilip	/* Convert checksum flags */
348283203Sarybchik	csum_flags = (flags & EFX_CKSUM_IPV4) ?
349227569Sphilip		(CSUM_IP_CHECKED | CSUM_IP_VALID) : 0;
350283203Sarybchik	if (flags & EFX_CKSUM_TCPUDP)
351227569Sphilip		csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
352227569Sphilip
353227569Sphilip	/* The hash covers a 4-tuple for TCP only */
354283203Sarybchik	if (flags & EFX_PKT_TCP) {
355284555Sarybchik		m->m_pkthdr.flowid =
356311088Sarybchik			efx_pseudo_hdr_hash_get(rxq->common,
357284555Sarybchik						EFX_RX_HASHALG_TOEPLITZ,
358284555Sarybchik						mtod(m, uint8_t *));
359281955Shiren		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
360227569Sphilip	}
361227569Sphilip	m->m_data += sc->rx_prefix_size;
362227569Sphilip	m->m_len = rx_desc->size - sc->rx_prefix_size;
363227569Sphilip	m->m_pkthdr.len = m->m_len;
364227569Sphilip	m->m_pkthdr.csum_flags = csum_flags;
365227569Sphilip	__sfxge_rx_deliver(sc, rx_desc->mbuf);
366227569Sphilip
367227569Sphilip	rx_desc->flags = EFX_DISCARD;
368227569Sphilip	rx_desc->mbuf = NULL;
369227569Sphilip}
370227569Sphilip
371280615Sarybchik#ifdef SFXGE_LRO
372280615Sarybchik
373227569Sphilipstatic void
374227569Sphilipsfxge_lro_deliver(struct sfxge_lro_state *st, struct sfxge_lro_conn *c)
375227569Sphilip{
376227569Sphilip	struct sfxge_softc *sc = st->sc;
377227569Sphilip	struct mbuf *m = c->mbuf;
378227569Sphilip	struct tcphdr *c_th;
379227569Sphilip	int csum_flags;
380227569Sphilip
381227569Sphilip	KASSERT(m, ("no mbuf to deliver"));
382227569Sphilip
383227569Sphilip	++st->n_bursts;
384227569Sphilip
385227569Sphilip	/* Finish off packet munging and recalculate IP header checksum. */
386227569Sphilip	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
387227569Sphilip		struct ip *iph = c->nh;
388227569Sphilip		iph->ip_len = htons(iph->ip_len);
389227569Sphilip		iph->ip_sum = 0;
390227569Sphilip		iph->ip_sum = in_cksum_hdr(iph);
391227569Sphilip		c_th = (struct tcphdr *)(iph + 1);
392227569Sphilip		csum_flags = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
393227569Sphilip			      CSUM_IP_CHECKED | CSUM_IP_VALID);
394227569Sphilip	} else {
395227569Sphilip		struct ip6_hdr *iph = c->nh;
396227569Sphilip		iph->ip6_plen = htons(iph->ip6_plen);
397227569Sphilip		c_th = (struct tcphdr *)(iph + 1);
398227569Sphilip		csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
399227569Sphilip	}
400227569Sphilip
401227569Sphilip	c_th->th_win = c->th_last->th_win;
402227569Sphilip	c_th->th_ack = c->th_last->th_ack;
403227569Sphilip	if (c_th->th_off == c->th_last->th_off) {
404227569Sphilip		/* Copy TCP options (take care to avoid going negative). */
405227569Sphilip		int optlen = ((c_th->th_off - 5) & 0xf) << 2u;
406227569Sphilip		memcpy(c_th + 1, c->th_last + 1, optlen);
407227569Sphilip	}
408227569Sphilip
409227569Sphilip	m->m_pkthdr.flowid = c->conn_hash;
410281955Shiren	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
411280606Sarybchik
412227569Sphilip	m->m_pkthdr.csum_flags = csum_flags;
413227569Sphilip	__sfxge_rx_deliver(sc, m);
414227569Sphilip
415227569Sphilip	c->mbuf = NULL;
416227569Sphilip	c->delivered = 1;
417227569Sphilip}
418227569Sphilip
419227569Sphilip/* Drop the given connection, and add it to the free list. */
420227569Sphilipstatic void sfxge_lro_drop(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
421227569Sphilip{
422227569Sphilip	unsigned bucket;
423227569Sphilip
424227569Sphilip	KASSERT(!c->mbuf, ("found orphaned mbuf"));
425227569Sphilip
426280501Sarybchik	if (c->next_buf.mbuf != NULL) {
427311083Sarybchik		sfxge_rx_deliver(rxq, &c->next_buf);
428227569Sphilip		LIST_REMOVE(c, active_link);
429227569Sphilip	}
430227569Sphilip
431227569Sphilip	bucket = c->conn_hash & rxq->lro.conns_mask;
432227569Sphilip	KASSERT(rxq->lro.conns_n[bucket] > 0, ("LRO: bucket fill level wrong"));
433227569Sphilip	--rxq->lro.conns_n[bucket];
434227569Sphilip	TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
435227569Sphilip	TAILQ_INSERT_HEAD(&rxq->lro.free_conns, c, link);
436227569Sphilip}
437227569Sphilip
438227569Sphilip/* Stop tracking connections that have gone idle in order to keep hash
439227569Sphilip * chains short.
440227569Sphilip */
441227569Sphilipstatic void sfxge_lro_purge_idle(struct sfxge_rxq *rxq, unsigned now)
442227569Sphilip{
443227569Sphilip	struct sfxge_lro_conn *c;
444227569Sphilip	unsigned i;
445227569Sphilip
446227569Sphilip	KASSERT(LIST_EMPTY(&rxq->lro.active_conns),
447227569Sphilip		("found active connections"));
448227569Sphilip
449227569Sphilip	rxq->lro.last_purge_ticks = now;
450227569Sphilip	for (i = 0; i <= rxq->lro.conns_mask; ++i) {
451227569Sphilip		if (TAILQ_EMPTY(&rxq->lro.conns[i]))
452227569Sphilip			continue;
453227569Sphilip
454227569Sphilip		c = TAILQ_LAST(&rxq->lro.conns[i], sfxge_lro_tailq);
455227569Sphilip		if (now - c->last_pkt_ticks > lro_idle_ticks) {
456227569Sphilip			++rxq->lro.n_drop_idle;
457227569Sphilip			sfxge_lro_drop(rxq, c);
458227569Sphilip		}
459227569Sphilip	}
460227569Sphilip}
461227569Sphilip
462227569Sphilipstatic void
463227569Sphilipsfxge_lro_merge(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
464227569Sphilip		struct mbuf *mbuf, struct tcphdr *th)
465227569Sphilip{
466227569Sphilip	struct tcphdr *c_th;
467227569Sphilip
468227569Sphilip	/* Tack the new mbuf onto the chain. */
469227569Sphilip	KASSERT(!mbuf->m_next, ("mbuf already chained"));
470227569Sphilip	c->mbuf_tail->m_next = mbuf;
471227569Sphilip	c->mbuf_tail = mbuf;
472227569Sphilip
473227569Sphilip	/* Increase length appropriately */
474227569Sphilip	c->mbuf->m_pkthdr.len += mbuf->m_len;
475227569Sphilip
476227569Sphilip	/* Update the connection state flags */
477227569Sphilip	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
478227569Sphilip		struct ip *iph = c->nh;
479227569Sphilip		iph->ip_len += mbuf->m_len;
480227569Sphilip		c_th = (struct tcphdr *)(iph + 1);
481227569Sphilip	} else {
482227569Sphilip		struct ip6_hdr *iph = c->nh;
483227569Sphilip		iph->ip6_plen += mbuf->m_len;
484227569Sphilip		c_th = (struct tcphdr *)(iph + 1);
485227569Sphilip	}
486227569Sphilip	c_th->th_flags |= (th->th_flags & TH_PUSH);
487227569Sphilip	c->th_last = th;
488227569Sphilip	++st->n_merges;
489227569Sphilip
490227569Sphilip	/* Pass packet up now if another segment could overflow the IP
491227569Sphilip	 * length.
492227569Sphilip	 */
493227569Sphilip	if (c->mbuf->m_pkthdr.len > 65536 - 9200)
494227569Sphilip		sfxge_lro_deliver(st, c);
495227569Sphilip}
496227569Sphilip
497227569Sphilipstatic void
498227569Sphilipsfxge_lro_start(struct sfxge_lro_state *st, struct sfxge_lro_conn *c,
499227569Sphilip		struct mbuf *mbuf, void *nh, struct tcphdr *th)
500227569Sphilip{
501227569Sphilip	/* Start the chain */
502227569Sphilip	c->mbuf = mbuf;
503227569Sphilip	c->mbuf_tail = c->mbuf;
504227569Sphilip	c->nh = nh;
505227569Sphilip	c->th_last = th;
506227569Sphilip
507227569Sphilip	mbuf->m_pkthdr.len = mbuf->m_len;
508227569Sphilip
509227569Sphilip	/* Mangle header fields for later processing */
510227569Sphilip	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
511227569Sphilip		struct ip *iph = nh;
512227569Sphilip		iph->ip_len = ntohs(iph->ip_len);
513227569Sphilip	} else {
514227569Sphilip		struct ip6_hdr *iph = nh;
515227569Sphilip		iph->ip6_plen = ntohs(iph->ip6_plen);
516227569Sphilip	}
517227569Sphilip}
518227569Sphilip
519227569Sphilip/* Try to merge or otherwise hold or deliver (as appropriate) the
520227569Sphilip * packet buffered for this connection (c->next_buf).  Return a flag
521227569Sphilip * indicating whether the connection is still active for LRO purposes.
522227569Sphilip */
523227569Sphilipstatic int
524227569Sphilipsfxge_lro_try_merge(struct sfxge_rxq *rxq, struct sfxge_lro_conn *c)
525227569Sphilip{
526227569Sphilip	struct sfxge_rx_sw_desc *rx_buf = &c->next_buf;
527227569Sphilip	char *eh = c->next_eh;
528227569Sphilip	int data_length, hdr_length, dont_merge;
529227569Sphilip	unsigned th_seq, pkt_length;
530227569Sphilip	struct tcphdr *th;
531227569Sphilip	unsigned now;
532227569Sphilip
533227569Sphilip	if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
534227569Sphilip		struct ip *iph = c->next_nh;
535227569Sphilip		th = (struct tcphdr *)(iph + 1);
536227569Sphilip		pkt_length = ntohs(iph->ip_len) + (char *) iph - eh;
537227569Sphilip	} else {
538227569Sphilip		struct ip6_hdr *iph = c->next_nh;
539227569Sphilip		th = (struct tcphdr *)(iph + 1);
540227569Sphilip		pkt_length = ntohs(iph->ip6_plen) + (char *) th - eh;
541227569Sphilip	}
542227569Sphilip
543227569Sphilip	hdr_length = (char *) th + th->th_off * 4 - eh;
544227569Sphilip	data_length = (min(pkt_length, rx_buf->size - rxq->sc->rx_prefix_size) -
545227569Sphilip		       hdr_length);
546227569Sphilip	th_seq = ntohl(th->th_seq);
547227569Sphilip	dont_merge = ((data_length <= 0)
548227569Sphilip		      | (th->th_flags & (TH_URG | TH_SYN | TH_RST | TH_FIN)));
549227569Sphilip
550227569Sphilip	/* Check for options other than aligned timestamp. */
551227569Sphilip	if (th->th_off != 5) {
552227569Sphilip		const uint32_t *opt_ptr = (const uint32_t *) (th + 1);
553227569Sphilip		if (th->th_off == 8 &&
554227569Sphilip		    opt_ptr[0] == ntohl((TCPOPT_NOP << 24) |
555227569Sphilip					(TCPOPT_NOP << 16) |
556227569Sphilip					(TCPOPT_TIMESTAMP << 8) |
557227569Sphilip					TCPOLEN_TIMESTAMP)) {
558227569Sphilip			/* timestamp option -- okay */
559227569Sphilip		} else {
560227569Sphilip			dont_merge = 1;
561227569Sphilip		}
562227569Sphilip	}
563227569Sphilip
564227569Sphilip	if (__predict_false(th_seq != c->next_seq)) {
565227569Sphilip		/* Out-of-order, so start counting again. */
566280501Sarybchik		if (c->mbuf != NULL)
567227569Sphilip			sfxge_lro_deliver(&rxq->lro, c);
568227569Sphilip		c->n_in_order_pkts -= lro_loss_packets;
569227569Sphilip		c->next_seq = th_seq + data_length;
570227569Sphilip		++rxq->lro.n_misorder;
571227569Sphilip		goto deliver_buf_out;
572227569Sphilip	}
573227569Sphilip	c->next_seq = th_seq + data_length;
574227569Sphilip
575227569Sphilip	now = ticks;
576227569Sphilip	if (now - c->last_pkt_ticks > lro_idle_ticks) {
577227569Sphilip		++rxq->lro.n_drop_idle;
578280501Sarybchik		if (c->mbuf != NULL)
579227569Sphilip			sfxge_lro_deliver(&rxq->lro, c);
580227569Sphilip		sfxge_lro_drop(rxq, c);
581280501Sarybchik		return (0);
582227569Sphilip	}
583227569Sphilip	c->last_pkt_ticks = ticks;
584227569Sphilip
585227569Sphilip	if (c->n_in_order_pkts < lro_slow_start_packets) {
586227569Sphilip		/* May be in slow-start, so don't merge. */
587227569Sphilip		++rxq->lro.n_slow_start;
588227569Sphilip		++c->n_in_order_pkts;
589227569Sphilip		goto deliver_buf_out;
590227569Sphilip	}
591227569Sphilip
592227569Sphilip	if (__predict_false(dont_merge)) {
593280501Sarybchik		if (c->mbuf != NULL)
594227569Sphilip			sfxge_lro_deliver(&rxq->lro, c);
595227569Sphilip		if (th->th_flags & (TH_FIN | TH_RST)) {
596227569Sphilip			++rxq->lro.n_drop_closed;
597227569Sphilip			sfxge_lro_drop(rxq, c);
598280501Sarybchik			return (0);
599227569Sphilip		}
600227569Sphilip		goto deliver_buf_out;
601227569Sphilip	}
602227569Sphilip
603227569Sphilip	rx_buf->mbuf->m_data += rxq->sc->rx_prefix_size;
604227569Sphilip
605227569Sphilip	if (__predict_true(c->mbuf != NULL)) {
606227569Sphilip		/* Remove headers and any padding */
607227569Sphilip		rx_buf->mbuf->m_data += hdr_length;
608227569Sphilip		rx_buf->mbuf->m_len = data_length;
609227569Sphilip
610227569Sphilip		sfxge_lro_merge(&rxq->lro, c, rx_buf->mbuf, th);
611227569Sphilip	} else {
612227569Sphilip		/* Remove any padding */
613227569Sphilip		rx_buf->mbuf->m_len = pkt_length;
614227569Sphilip
615227569Sphilip		sfxge_lro_start(&rxq->lro, c, rx_buf->mbuf, c->next_nh, th);
616227569Sphilip	}
617227569Sphilip
618227569Sphilip	rx_buf->mbuf = NULL;
619280501Sarybchik	return (1);
620227569Sphilip
621227569Sphilip deliver_buf_out:
622311083Sarybchik	sfxge_rx_deliver(rxq, rx_buf);
623280501Sarybchik	return (1);
624227569Sphilip}
625227569Sphilip
626227569Sphilipstatic void sfxge_lro_new_conn(struct sfxge_lro_state *st, uint32_t conn_hash,
627227569Sphilip			       uint16_t l2_id, void *nh, struct tcphdr *th)
628227569Sphilip{
629227569Sphilip	unsigned bucket = conn_hash & st->conns_mask;
630227569Sphilip	struct sfxge_lro_conn *c;
631227569Sphilip
632227569Sphilip	if (st->conns_n[bucket] >= lro_chain_max) {
633227569Sphilip		++st->n_too_many;
634227569Sphilip		return;
635227569Sphilip	}
636227569Sphilip
637227569Sphilip	if (!TAILQ_EMPTY(&st->free_conns)) {
638227569Sphilip		c = TAILQ_FIRST(&st->free_conns);
639227569Sphilip		TAILQ_REMOVE(&st->free_conns, c, link);
640227569Sphilip	} else {
641234086Sglebius		c = malloc(sizeof(*c), M_SFXGE, M_NOWAIT);
642227569Sphilip		if (c == NULL)
643227569Sphilip			return;
644227569Sphilip		c->mbuf = NULL;
645227569Sphilip		c->next_buf.mbuf = NULL;
646227569Sphilip	}
647227569Sphilip
648227569Sphilip	/* Create the connection tracking data */
649227569Sphilip	++st->conns_n[bucket];
650227569Sphilip	TAILQ_INSERT_HEAD(&st->conns[bucket], c, link);
651227569Sphilip	c->l2_id = l2_id;
652227569Sphilip	c->conn_hash = conn_hash;
653227569Sphilip	c->source = th->th_sport;
654227569Sphilip	c->dest = th->th_dport;
655227569Sphilip	c->n_in_order_pkts = 0;
656227569Sphilip	c->last_pkt_ticks = *(volatile int *)&ticks;
657227569Sphilip	c->delivered = 0;
658227569Sphilip	++st->n_new_stream;
659227569Sphilip	/* NB. We don't initialise c->next_seq, and it doesn't matter what
660227569Sphilip	 * value it has.  Most likely the next packet received for this
661227569Sphilip	 * connection will not match -- no harm done.
662227569Sphilip	 */
663227569Sphilip}
664227569Sphilip
665227569Sphilip/* Process mbuf and decide whether to dispatch it to the stack now or
666227569Sphilip * later.
667227569Sphilip */
668227569Sphilipstatic void
669227569Sphilipsfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
670227569Sphilip{
671227569Sphilip	struct sfxge_softc *sc = rxq->sc;
672227569Sphilip	struct mbuf *m = rx_buf->mbuf;
673227569Sphilip	struct ether_header *eh;
674227569Sphilip	struct sfxge_lro_conn *c;
675227569Sphilip	uint16_t l2_id;
676227569Sphilip	uint16_t l3_proto;
677280501Sarybchik	void *nh;
678227569Sphilip	struct tcphdr *th;
679227569Sphilip	uint32_t conn_hash;
680227569Sphilip	unsigned bucket;
681227569Sphilip
682227569Sphilip	/* Get the hardware hash */
683311088Sarybchik	conn_hash = efx_pseudo_hdr_hash_get(rxq->common,
684284555Sarybchik					    EFX_RX_HASHALG_TOEPLITZ,
685284555Sarybchik					    mtod(m, uint8_t *));
686227569Sphilip
687227569Sphilip	eh = (struct ether_header *)(m->m_data + sc->rx_prefix_size);
688227569Sphilip	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
689227569Sphilip		struct ether_vlan_header *veh = (struct ether_vlan_header *)eh;
690227569Sphilip		l2_id = EVL_VLANOFTAG(ntohs(veh->evl_tag)) |
691227569Sphilip			SFXGE_LRO_L2_ID_VLAN;
692227569Sphilip		l3_proto = veh->evl_proto;
693227569Sphilip		nh = veh + 1;
694227569Sphilip	} else {
695227569Sphilip		l2_id = 0;
696227569Sphilip		l3_proto = eh->ether_type;
697227569Sphilip		nh = eh + 1;
698227569Sphilip	}
699227569Sphilip
700227569Sphilip	/* Check whether this is a suitable packet (unfragmented
701227569Sphilip	 * TCP/IPv4 or TCP/IPv6).  If so, find the TCP header and
702227569Sphilip	 * length, and compute a hash if necessary.  If not, return.
703227569Sphilip	 */
704227569Sphilip	if (l3_proto == htons(ETHERTYPE_IP)) {
705227569Sphilip		struct ip *iph = nh;
706283206Sarybchik
707283206Sarybchik		KASSERT(iph->ip_p == IPPROTO_TCP,
708283206Sarybchik		    ("IPv4 protocol is not TCP, but packet marker is set"));
709283206Sarybchik		if ((iph->ip_hl - (sizeof(*iph) >> 2u)) |
710227569Sphilip		    (iph->ip_off & htons(IP_MF | IP_OFFMASK)))
711227569Sphilip			goto deliver_now;
712227569Sphilip		th = (struct tcphdr *)(iph + 1);
713227569Sphilip	} else if (l3_proto == htons(ETHERTYPE_IPV6)) {
714227569Sphilip		struct ip6_hdr *iph = nh;
715283206Sarybchik
716283206Sarybchik		KASSERT(iph->ip6_nxt == IPPROTO_TCP,
717283206Sarybchik		    ("IPv6 next header is not TCP, but packet marker is set"));
718227569Sphilip		l2_id |= SFXGE_LRO_L2_ID_IPV6;
719227569Sphilip		th = (struct tcphdr *)(iph + 1);
720227569Sphilip	} else {
721227569Sphilip		goto deliver_now;
722227569Sphilip	}
723227569Sphilip
724227569Sphilip	bucket = conn_hash & rxq->lro.conns_mask;
725227569Sphilip
726227569Sphilip	TAILQ_FOREACH(c, &rxq->lro.conns[bucket], link) {
727227569Sphilip		if ((c->l2_id - l2_id) | (c->conn_hash - conn_hash))
728227569Sphilip			continue;
729227569Sphilip		if ((c->source - th->th_sport) | (c->dest - th->th_dport))
730227569Sphilip			continue;
731280501Sarybchik		if (c->mbuf != NULL) {
732227569Sphilip			if (SFXGE_LRO_CONN_IS_TCPIPV4(c)) {
733227569Sphilip				struct ip *c_iph, *iph = nh;
734227569Sphilip				c_iph = c->nh;
735227569Sphilip				if ((c_iph->ip_src.s_addr - iph->ip_src.s_addr) |
736227569Sphilip				    (c_iph->ip_dst.s_addr - iph->ip_dst.s_addr))
737227569Sphilip					continue;
738227569Sphilip			} else {
739227569Sphilip				struct ip6_hdr *c_iph, *iph = nh;
740227569Sphilip				c_iph = c->nh;
741227569Sphilip				if (ipv6_addr_cmp(&c_iph->ip6_src, &iph->ip6_src) |
742227569Sphilip				    ipv6_addr_cmp(&c_iph->ip6_dst, &iph->ip6_dst))
743227569Sphilip					continue;
744227569Sphilip			}
745227569Sphilip		}
746227569Sphilip
747227569Sphilip		/* Re-insert at head of list to reduce lookup time. */
748227569Sphilip		TAILQ_REMOVE(&rxq->lro.conns[bucket], c, link);
749227569Sphilip		TAILQ_INSERT_HEAD(&rxq->lro.conns[bucket], c, link);
750227569Sphilip
751280501Sarybchik		if (c->next_buf.mbuf != NULL) {
752227569Sphilip			if (!sfxge_lro_try_merge(rxq, c))
753227569Sphilip				goto deliver_now;
754227569Sphilip		} else {
755227569Sphilip			LIST_INSERT_HEAD(&rxq->lro.active_conns, c,
756227569Sphilip			    active_link);
757227569Sphilip		}
758227569Sphilip		c->next_buf = *rx_buf;
759227569Sphilip		c->next_eh = eh;
760227569Sphilip		c->next_nh = nh;
761227569Sphilip
762227569Sphilip		rx_buf->mbuf = NULL;
763227569Sphilip		rx_buf->flags = EFX_DISCARD;
764227569Sphilip		return;
765227569Sphilip	}
766227569Sphilip
767227569Sphilip	sfxge_lro_new_conn(&rxq->lro, conn_hash, l2_id, nh, th);
768227569Sphilip deliver_now:
769311083Sarybchik	sfxge_rx_deliver(rxq, rx_buf);
770227569Sphilip}
771227569Sphilip
772227569Sphilipstatic void sfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
773227569Sphilip{
774227569Sphilip	struct sfxge_lro_state *st = &rxq->lro;
775227569Sphilip	struct sfxge_lro_conn *c;
776227569Sphilip	unsigned t;
777227569Sphilip
778227569Sphilip	while (!LIST_EMPTY(&st->active_conns)) {
779227569Sphilip		c = LIST_FIRST(&st->active_conns);
780280501Sarybchik		if (!c->delivered && c->mbuf != NULL)
781227569Sphilip			sfxge_lro_deliver(st, c);
782227569Sphilip		if (sfxge_lro_try_merge(rxq, c)) {
783280501Sarybchik			if (c->mbuf != NULL)
784227569Sphilip				sfxge_lro_deliver(st, c);
785227569Sphilip			LIST_REMOVE(c, active_link);
786227569Sphilip		}
787227569Sphilip		c->delivered = 0;
788227569Sphilip	}
789227569Sphilip
790227569Sphilip	t = *(volatile int *)&ticks;
791227569Sphilip	if (__predict_false(t != st->last_purge_ticks))
792227569Sphilip		sfxge_lro_purge_idle(rxq, t);
793227569Sphilip}
794227569Sphilip
795280615Sarybchik#else	/* !SFXGE_LRO */
796280615Sarybchik
797280615Sarybchikstatic void
798280615Sarybchiksfxge_lro(struct sfxge_rxq *rxq, struct sfxge_rx_sw_desc *rx_buf)
799280615Sarybchik{
800280615Sarybchik}
801280615Sarybchik
802280615Sarybchikstatic void
803280615Sarybchiksfxge_lro_end_of_burst(struct sfxge_rxq *rxq)
804280615Sarybchik{
805280615Sarybchik}
806280615Sarybchik
807280615Sarybchik#endif	/* SFXGE_LRO */
808280615Sarybchik
809227569Sphilipvoid
810227569Sphilipsfxge_rx_qcomplete(struct sfxge_rxq *rxq, boolean_t eop)
811227569Sphilip{
812227569Sphilip	struct sfxge_softc *sc = rxq->sc;
813283209Sarybchik	int if_capenable = sc->ifnet->if_capenable;
814283209Sarybchik	int lro_enabled = if_capenable & IFCAP_LRO;
815227569Sphilip	unsigned int index;
816227569Sphilip	struct sfxge_evq *evq;
817227569Sphilip	unsigned int completed;
818227569Sphilip	unsigned int level;
819227569Sphilip	struct mbuf *m;
820227569Sphilip	struct sfxge_rx_sw_desc *prev = NULL;
821227569Sphilip
822227569Sphilip	index = rxq->index;
823227569Sphilip	evq = sc->evq[index];
824227569Sphilip
825280522Sarybchik	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
826227569Sphilip
827227569Sphilip	completed = rxq->completed;
828227569Sphilip	while (completed != rxq->pending) {
829227569Sphilip		unsigned int id;
830227569Sphilip		struct sfxge_rx_sw_desc *rx_desc;
831227569Sphilip
832280502Sarybchik		id = completed++ & rxq->ptr_mask;
833227569Sphilip		rx_desc = &rxq->queue[id];
834227569Sphilip		m = rx_desc->mbuf;
835227569Sphilip
836280596Sarybchik		if (__predict_false(rxq->init_state != SFXGE_RXQ_STARTED))
837227569Sphilip			goto discard;
838227569Sphilip
839227569Sphilip		if (rx_desc->flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
840227569Sphilip			goto discard;
841227569Sphilip
842284555Sarybchik		/* Read the length from the psuedo header if required */
843284555Sarybchik		if (rx_desc->flags & EFX_PKT_PREFIX_LEN) {
844284555Sarybchik			uint16_t tmp_size;
845284555Sarybchik			int rc;
846311088Sarybchik			rc = efx_pseudo_hdr_pkt_length_get(rxq->common,
847284555Sarybchik							   mtod(m, uint8_t *),
848284555Sarybchik							   &tmp_size);
849284555Sarybchik			KASSERT(rc == 0, ("cannot get packet length: %d", rc));
850284555Sarybchik			rx_desc->size = (int)tmp_size + sc->rx_prefix_size;
851284555Sarybchik		}
852284555Sarybchik
853227569Sphilip		prefetch_read_many(mtod(m, caddr_t));
854227569Sphilip
855283209Sarybchik		switch (rx_desc->flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) {
856283209Sarybchik		case EFX_PKT_IPV4:
857283209Sarybchik			if (~if_capenable & IFCAP_RXCSUM)
858283209Sarybchik				rx_desc->flags &=
859283209Sarybchik				    ~(EFX_CKSUM_IPV4 | EFX_CKSUM_TCPUDP);
860283209Sarybchik			break;
861283209Sarybchik		case EFX_PKT_IPV6:
862283209Sarybchik			if (~if_capenable & IFCAP_RXCSUM_IPV6)
863283209Sarybchik				rx_desc->flags &= ~EFX_CKSUM_TCPUDP;
864283209Sarybchik			break;
865283209Sarybchik		case 0:
866283209Sarybchik			/* Check for loopback packets */
867283209Sarybchik			{
868283209Sarybchik				struct ether_header *etherhp;
869227569Sphilip
870283209Sarybchik				/*LINTED*/
871283209Sarybchik				etherhp = mtod(m, struct ether_header *);
872227569Sphilip
873283209Sarybchik				if (etherhp->ether_type ==
874283209Sarybchik				    htons(SFXGE_ETHERTYPE_LOOPBACK)) {
875283209Sarybchik					EFSYS_PROBE(loopback);
876227569Sphilip
877283209Sarybchik					rxq->loopback++;
878283209Sarybchik					goto discard;
879283209Sarybchik				}
880227569Sphilip			}
881283209Sarybchik			break;
882283209Sarybchik		default:
883283209Sarybchik			KASSERT(B_FALSE,
884283209Sarybchik			    ("Rx descriptor with both IPv4 and IPv6 flags"));
885283209Sarybchik			goto discard;
886227569Sphilip		}
887227569Sphilip
888227569Sphilip		/* Pass packet up the stack or into LRO (pipelined) */
889227569Sphilip		if (prev != NULL) {
890283206Sarybchik			if (lro_enabled &&
891283206Sarybchik			    ((prev->flags & (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)) ==
892283206Sarybchik			     (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)))
893227569Sphilip				sfxge_lro(rxq, prev);
894227569Sphilip			else
895311083Sarybchik				sfxge_rx_deliver(rxq, prev);
896227569Sphilip		}
897227569Sphilip		prev = rx_desc;
898227569Sphilip		continue;
899227569Sphilip
900227569Sphilipdiscard:
901227569Sphilip		/* Return the packet to the pool */
902227569Sphilip		m_free(m);
903227569Sphilip		rx_desc->mbuf = NULL;
904227569Sphilip	}
905227569Sphilip	rxq->completed = completed;
906227569Sphilip
907227569Sphilip	level = rxq->added - rxq->completed;
908227569Sphilip
909227569Sphilip	/* Pass last packet up the stack or into LRO */
910227569Sphilip	if (prev != NULL) {
911283206Sarybchik		if (lro_enabled &&
912283206Sarybchik		    ((prev->flags & (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)) ==
913283206Sarybchik		     (EFX_PKT_TCP | EFX_CKSUM_TCPUDP)))
914227569Sphilip			sfxge_lro(rxq, prev);
915227569Sphilip		else
916311083Sarybchik			sfxge_rx_deliver(rxq, prev);
917227569Sphilip	}
918227569Sphilip
919227569Sphilip	/*
920227569Sphilip	 * If there are any pending flows and this is the end of the
921227569Sphilip	 * poll then they must be completed.
922227569Sphilip	 */
923227569Sphilip	if (eop)
924227569Sphilip		sfxge_lro_end_of_burst(rxq);
925227569Sphilip
926227569Sphilip	/* Top up the queue if necessary */
927280502Sarybchik	if (level < rxq->refill_threshold)
928280502Sarybchik		sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(rxq->entries), B_FALSE);
929227569Sphilip}
930227569Sphilip
931227569Sphilipstatic void
932227569Sphilipsfxge_rx_qstop(struct sfxge_softc *sc, unsigned int index)
933227569Sphilip{
934227569Sphilip	struct sfxge_rxq *rxq;
935227569Sphilip	struct sfxge_evq *evq;
936227569Sphilip	unsigned int count;
937284555Sarybchik	unsigned int retry = 3;
938227569Sphilip
939284555Sarybchik	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
940284555Sarybchik
941227569Sphilip	rxq = sc->rxq[index];
942227569Sphilip	evq = sc->evq[index];
943227569Sphilip
944280522Sarybchik	SFXGE_EVQ_LOCK(evq);
945280501Sarybchik
946227569Sphilip	KASSERT(rxq->init_state == SFXGE_RXQ_STARTED,
947227569Sphilip	    ("rxq not started"));
948227569Sphilip
949227569Sphilip	rxq->init_state = SFXGE_RXQ_INITIALIZED;
950227569Sphilip
951227569Sphilip	callout_stop(&rxq->refill_callout);
952227569Sphilip
953284555Sarybchik	while (rxq->flush_state != SFXGE_FLUSH_DONE && retry != 0) {
954284555Sarybchik		rxq->flush_state = SFXGE_FLUSH_PENDING;
955227569Sphilip
956284555Sarybchik		SFXGE_EVQ_UNLOCK(evq);
957227569Sphilip
958284555Sarybchik		/* Flush the receive queue */
959284555Sarybchik		if (efx_rx_qflush(rxq->common) != 0) {
960284555Sarybchik			SFXGE_EVQ_LOCK(evq);
961284555Sarybchik			rxq->flush_state = SFXGE_FLUSH_FAILED;
962284555Sarybchik			break;
963284555Sarybchik		}
964227569Sphilip
965284555Sarybchik		count = 0;
966284555Sarybchik		do {
967284555Sarybchik			/* Spin for 100 ms */
968284555Sarybchik			DELAY(100000);
969227569Sphilip
970284555Sarybchik			if (rxq->flush_state != SFXGE_FLUSH_PENDING)
971284555Sarybchik				break;
972227569Sphilip
973284555Sarybchik		} while (++count < 20);
974227569Sphilip
975284555Sarybchik		SFXGE_EVQ_LOCK(evq);
976227569Sphilip
977284555Sarybchik		if (rxq->flush_state == SFXGE_FLUSH_PENDING) {
978284555Sarybchik			/* Flush timeout - neither done nor failed */
979284555Sarybchik			log(LOG_ERR, "%s: Cannot flush Rx queue %u\n",
980284555Sarybchik			    device_get_nameunit(sc->dev), index);
981284555Sarybchik			rxq->flush_state = SFXGE_FLUSH_DONE;
982284555Sarybchik		}
983284555Sarybchik		retry--;
984284555Sarybchik	}
985284555Sarybchik	if (rxq->flush_state == SFXGE_FLUSH_FAILED) {
986284555Sarybchik		log(LOG_ERR, "%s: Flushing Rx queue %u failed\n",
987284555Sarybchik		    device_get_nameunit(sc->dev), index);
988284555Sarybchik		rxq->flush_state = SFXGE_FLUSH_DONE;
989284555Sarybchik	}
990227569Sphilip
991227569Sphilip	rxq->pending = rxq->added;
992227569Sphilip	sfxge_rx_qcomplete(rxq, B_TRUE);
993227569Sphilip
994227569Sphilip	KASSERT(rxq->completed == rxq->pending,
995227569Sphilip	    ("rxq->completed != rxq->pending"));
996227569Sphilip
997227569Sphilip	rxq->added = 0;
998284555Sarybchik	rxq->pushed = 0;
999227569Sphilip	rxq->pending = 0;
1000227569Sphilip	rxq->completed = 0;
1001227569Sphilip	rxq->loopback = 0;
1002227569Sphilip
1003227569Sphilip	/* Destroy the common code receive queue. */
1004280501Sarybchik	efx_rx_qdestroy(rxq->common);
1005227569Sphilip
1006227569Sphilip	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
1007280502Sarybchik	    EFX_RXQ_NBUFS(sc->rxq_entries));
1008227569Sphilip
1009280522Sarybchik	SFXGE_EVQ_UNLOCK(evq);
1010227569Sphilip}
1011227569Sphilip
1012227569Sphilipstatic int
1013227569Sphilipsfxge_rx_qstart(struct sfxge_softc *sc, unsigned int index)
1014227569Sphilip{
1015227569Sphilip	struct sfxge_rxq *rxq;
1016227569Sphilip	efsys_mem_t *esmp;
1017227569Sphilip	struct sfxge_evq *evq;
1018227569Sphilip	int rc;
1019227569Sphilip
1020284555Sarybchik	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
1021284555Sarybchik
1022227569Sphilip	rxq = sc->rxq[index];
1023227569Sphilip	esmp = &rxq->mem;
1024227569Sphilip	evq = sc->evq[index];
1025227569Sphilip
1026227569Sphilip	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1027227569Sphilip	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1028227569Sphilip	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
1029227569Sphilip	    ("evq->init_state != SFXGE_EVQ_STARTED"));
1030227569Sphilip
1031227569Sphilip	/* Program the buffer table. */
1032227569Sphilip	if ((rc = efx_sram_buf_tbl_set(sc->enp, rxq->buf_base_id, esmp,
1033280502Sarybchik	    EFX_RXQ_NBUFS(sc->rxq_entries))) != 0)
1034280502Sarybchik		return (rc);
1035227569Sphilip
1036227569Sphilip	/* Create the common code receive queue. */
1037298836Sarybchik	if ((rc = efx_rx_qcreate(sc->enp, index, 0, EFX_RXQ_TYPE_DEFAULT,
1038280502Sarybchik	    esmp, sc->rxq_entries, rxq->buf_base_id, evq->common,
1039227569Sphilip	    &rxq->common)) != 0)
1040227569Sphilip		goto fail;
1041227569Sphilip
1042280522Sarybchik	SFXGE_EVQ_LOCK(evq);
1043227569Sphilip
1044227569Sphilip	/* Enable the receive queue. */
1045227569Sphilip	efx_rx_qenable(rxq->common);
1046227569Sphilip
1047227569Sphilip	rxq->init_state = SFXGE_RXQ_STARTED;
1048284555Sarybchik	rxq->flush_state = SFXGE_FLUSH_REQUIRED;
1049227569Sphilip
1050227569Sphilip	/* Try to fill the queue from the pool. */
1051280502Sarybchik	sfxge_rx_qfill(rxq, EFX_RXQ_LIMIT(sc->rxq_entries), B_FALSE);
1052227569Sphilip
1053280522Sarybchik	SFXGE_EVQ_UNLOCK(evq);
1054227569Sphilip
1055227569Sphilip	return (0);
1056227569Sphilip
1057227569Sphilipfail:
1058227569Sphilip	efx_sram_buf_tbl_clear(sc->enp, rxq->buf_base_id,
1059280502Sarybchik	    EFX_RXQ_NBUFS(sc->rxq_entries));
1060280502Sarybchik	return (rc);
1061227569Sphilip}
1062227569Sphilip
1063227569Sphilipvoid
1064227569Sphilipsfxge_rx_stop(struct sfxge_softc *sc)
1065227569Sphilip{
1066227569Sphilip	int index;
1067227569Sphilip
1068284555Sarybchik	efx_mac_filter_default_rxq_clear(sc->enp);
1069284555Sarybchik
1070227569Sphilip	/* Stop the receive queue(s) */
1071280539Sarybchik	index = sc->rxq_count;
1072227569Sphilip	while (--index >= 0)
1073227569Sphilip		sfxge_rx_qstop(sc, index);
1074227569Sphilip
1075227569Sphilip	sc->rx_prefix_size = 0;
1076227569Sphilip	sc->rx_buffer_size = 0;
1077227569Sphilip
1078227569Sphilip	efx_rx_fini(sc->enp);
1079227569Sphilip}
1080227569Sphilip
1081227569Sphilipint
1082227569Sphilipsfxge_rx_start(struct sfxge_softc *sc)
1083227569Sphilip{
1084227569Sphilip	struct sfxge_intr *intr;
1085284555Sarybchik	const efx_nic_cfg_t *encp;
1086284555Sarybchik	size_t hdrlen, align, reserved;
1087227569Sphilip	int index;
1088227569Sphilip	int rc;
1089227569Sphilip
1090227569Sphilip	intr = &sc->intr;
1091227569Sphilip
1092227569Sphilip	/* Initialize the common code receive module. */
1093227569Sphilip	if ((rc = efx_rx_init(sc->enp)) != 0)
1094227569Sphilip		return (rc);
1095227569Sphilip
1096284555Sarybchik	encp = efx_nic_cfg_get(sc->enp);
1097284555Sarybchik	sc->rx_buffer_size = EFX_MAC_PDU(sc->ifnet->if_mtu);
1098227569Sphilip
1099342475Sarybchik	/* Calculate the receive packet buffer size. */
1100284555Sarybchik	sc->rx_prefix_size = encp->enc_rx_prefix_size;
1101284555Sarybchik
1102284555Sarybchik	/* Ensure IP headers are 32bit aligned */
1103284555Sarybchik	hdrlen = sc->rx_prefix_size + sizeof (struct ether_header);
1104284555Sarybchik	sc->rx_buffer_align = P2ROUNDUP(hdrlen, 4) - hdrlen;
1105284555Sarybchik
1106284555Sarybchik	sc->rx_buffer_size += sc->rx_buffer_align;
1107284555Sarybchik
1108284555Sarybchik	/* Align end of packet buffer for RX DMA end padding */
1109284555Sarybchik	align = MAX(1, encp->enc_rx_buf_align_end);
1110284555Sarybchik	EFSYS_ASSERT(ISP2(align));
1111284555Sarybchik	sc->rx_buffer_size = P2ROUNDUP(sc->rx_buffer_size, align);
1112284555Sarybchik
1113301384Sarybchik	/*
1114284555Sarybchik	 * Standard mbuf zones only guarantee pointer-size alignment;
1115284555Sarybchik	 * we need extra space to align to the cache line
1116284555Sarybchik	 */
1117284555Sarybchik	reserved = sc->rx_buffer_size + CACHE_LINE_SIZE;
1118284555Sarybchik
1119227569Sphilip	/* Select zone for packet buffers */
1120284555Sarybchik	if (reserved <= MCLBYTES)
1121227569Sphilip		sc->rx_buffer_zone = zone_clust;
1122284555Sarybchik	else if (reserved <= MJUMPAGESIZE)
1123227569Sphilip		sc->rx_buffer_zone = zone_jumbop;
1124284555Sarybchik	else if (reserved <= MJUM9BYTES)
1125227569Sphilip		sc->rx_buffer_zone = zone_jumbo9;
1126227569Sphilip	else
1127227569Sphilip		sc->rx_buffer_zone = zone_jumbo16;
1128227569Sphilip
1129227569Sphilip	/*
1130227569Sphilip	 * Set up the scale table.  Enable all hash types and hash insertion.
1131227569Sphilip	 */
1132310830Sarybchik	for (index = 0; index < nitems(sc->rx_indir_table); index++)
1133280539Sarybchik		sc->rx_indir_table[index] = index % sc->rxq_count;
1134227569Sphilip	if ((rc = efx_rx_scale_tbl_set(sc->enp, sc->rx_indir_table,
1135310830Sarybchik				       nitems(sc->rx_indir_table))) != 0)
1136227569Sphilip		goto fail;
1137227569Sphilip	(void)efx_rx_scale_mode_set(sc->enp, EFX_RX_HASHALG_TOEPLITZ,
1138311086Sarybchik	    EFX_RX_HASH_IPV4 | EFX_RX_HASH_TCPIPV4 |
1139311086Sarybchik	    EFX_RX_HASH_IPV6 | EFX_RX_HASH_TCPIPV6, B_TRUE);
1140227569Sphilip
1141284555Sarybchik	if ((rc = efx_rx_scale_key_set(sc->enp, toep_key,
1142284555Sarybchik				       sizeof(toep_key))) != 0)
1143227569Sphilip		goto fail;
1144227569Sphilip
1145227569Sphilip	/* Start the receive queue(s). */
1146280539Sarybchik	for (index = 0; index < sc->rxq_count; index++) {
1147227569Sphilip		if ((rc = sfxge_rx_qstart(sc, index)) != 0)
1148227569Sphilip			goto fail2;
1149227569Sphilip	}
1150227569Sphilip
1151284555Sarybchik	rc = efx_mac_filter_default_rxq_set(sc->enp, sc->rxq[0]->common,
1152284555Sarybchik					    sc->intr.n_alloc > 1);
1153284555Sarybchik	if (rc != 0)
1154284555Sarybchik		goto fail3;
1155284555Sarybchik
1156227569Sphilip	return (0);
1157227569Sphilip
1158284555Sarybchikfail3:
1159227569Sphilipfail2:
1160227569Sphilip	while (--index >= 0)
1161227569Sphilip		sfxge_rx_qstop(sc, index);
1162227569Sphilip
1163227569Sphilipfail:
1164227569Sphilip	efx_rx_fini(sc->enp);
1165227569Sphilip
1166227569Sphilip	return (rc);
1167227569Sphilip}
1168227569Sphilip
1169280615Sarybchik#ifdef SFXGE_LRO
1170280615Sarybchik
1171227569Sphilipstatic void sfxge_lro_init(struct sfxge_rxq *rxq)
1172227569Sphilip{
1173227569Sphilip	struct sfxge_lro_state *st = &rxq->lro;
1174227569Sphilip	unsigned i;
1175227569Sphilip
1176227569Sphilip	st->conns_mask = lro_table_size - 1;
1177227569Sphilip	KASSERT(!((st->conns_mask + 1) & st->conns_mask),
1178227569Sphilip		("lro_table_size must be a power of 2"));
1179227569Sphilip	st->sc = rxq->sc;
1180227569Sphilip	st->conns = malloc((st->conns_mask + 1) * sizeof(st->conns[0]),
1181227569Sphilip			   M_SFXGE, M_WAITOK);
1182227569Sphilip	st->conns_n = malloc((st->conns_mask + 1) * sizeof(st->conns_n[0]),
1183227569Sphilip			     M_SFXGE, M_WAITOK);
1184227569Sphilip	for (i = 0; i <= st->conns_mask; ++i) {
1185227569Sphilip		TAILQ_INIT(&st->conns[i]);
1186227569Sphilip		st->conns_n[i] = 0;
1187227569Sphilip	}
1188227569Sphilip	LIST_INIT(&st->active_conns);
1189227569Sphilip	TAILQ_INIT(&st->free_conns);
1190227569Sphilip}
1191227569Sphilip
1192227569Sphilipstatic void sfxge_lro_fini(struct sfxge_rxq *rxq)
1193227569Sphilip{
1194227569Sphilip	struct sfxge_lro_state *st = &rxq->lro;
1195227569Sphilip	struct sfxge_lro_conn *c;
1196227569Sphilip	unsigned i;
1197227569Sphilip
1198227569Sphilip	/* Return cleanly if sfxge_lro_init() has not been called. */
1199227569Sphilip	if (st->conns == NULL)
1200227569Sphilip		return;
1201227569Sphilip
1202227569Sphilip	KASSERT(LIST_EMPTY(&st->active_conns), ("found active connections"));
1203227569Sphilip
1204227569Sphilip	for (i = 0; i <= st->conns_mask; ++i) {
1205227569Sphilip		while (!TAILQ_EMPTY(&st->conns[i])) {
1206227569Sphilip			c = TAILQ_LAST(&st->conns[i], sfxge_lro_tailq);
1207227569Sphilip			sfxge_lro_drop(rxq, c);
1208227569Sphilip		}
1209227569Sphilip	}
1210227569Sphilip
1211227569Sphilip	while (!TAILQ_EMPTY(&st->free_conns)) {
1212227569Sphilip		c = TAILQ_FIRST(&st->free_conns);
1213227569Sphilip		TAILQ_REMOVE(&st->free_conns, c, link);
1214227569Sphilip		KASSERT(!c->mbuf, ("found orphaned mbuf"));
1215227569Sphilip		free(c, M_SFXGE);
1216227569Sphilip	}
1217227569Sphilip
1218227569Sphilip	free(st->conns_n, M_SFXGE);
1219227569Sphilip	free(st->conns, M_SFXGE);
1220227569Sphilip	st->conns = NULL;
1221227569Sphilip}
1222227569Sphilip
1223280615Sarybchik#else
1224280615Sarybchik
1225227569Sphilipstatic void
1226280615Sarybchiksfxge_lro_init(struct sfxge_rxq *rxq)
1227280615Sarybchik{
1228280615Sarybchik}
1229280615Sarybchik
1230280615Sarybchikstatic void
1231280615Sarybchiksfxge_lro_fini(struct sfxge_rxq *rxq)
1232280615Sarybchik{
1233280615Sarybchik}
1234280615Sarybchik
1235280615Sarybchik#endif	/* SFXGE_LRO */
1236280615Sarybchik
1237280615Sarybchikstatic void
1238227569Sphilipsfxge_rx_qfini(struct sfxge_softc *sc, unsigned int index)
1239227569Sphilip{
1240227569Sphilip	struct sfxge_rxq *rxq;
1241227569Sphilip
1242227569Sphilip	rxq = sc->rxq[index];
1243227569Sphilip
1244227569Sphilip	KASSERT(rxq->init_state == SFXGE_RXQ_INITIALIZED,
1245227569Sphilip	    ("rxq->init_state != SFXGE_RXQ_INITIALIZED"));
1246227569Sphilip
1247227569Sphilip	/* Free the context array and the flow table. */
1248227569Sphilip	free(rxq->queue, M_SFXGE);
1249227569Sphilip	sfxge_lro_fini(rxq);
1250227569Sphilip
1251227569Sphilip	/* Release DMA memory. */
1252227569Sphilip	sfxge_dma_free(&rxq->mem);
1253227569Sphilip
1254227569Sphilip	sc->rxq[index] = NULL;
1255227569Sphilip
1256227569Sphilip	free(rxq, M_SFXGE);
1257227569Sphilip}
1258227569Sphilip
1259227569Sphilipstatic int
1260227569Sphilipsfxge_rx_qinit(struct sfxge_softc *sc, unsigned int index)
1261227569Sphilip{
1262227569Sphilip	struct sfxge_rxq *rxq;
1263227569Sphilip	struct sfxge_evq *evq;
1264227569Sphilip	efsys_mem_t *esmp;
1265227569Sphilip	int rc;
1266227569Sphilip
1267280539Sarybchik	KASSERT(index < sc->rxq_count, ("index >= %d", sc->rxq_count));
1268227569Sphilip
1269227569Sphilip	rxq = malloc(sizeof(struct sfxge_rxq), M_SFXGE, M_ZERO | M_WAITOK);
1270227569Sphilip	rxq->sc = sc;
1271227569Sphilip	rxq->index = index;
1272280502Sarybchik	rxq->entries = sc->rxq_entries;
1273280502Sarybchik	rxq->ptr_mask = rxq->entries - 1;
1274280502Sarybchik	rxq->refill_threshold = RX_REFILL_THRESHOLD(rxq->entries);
1275227569Sphilip
1276227569Sphilip	sc->rxq[index] = rxq;
1277227569Sphilip	esmp = &rxq->mem;
1278227569Sphilip
1279227569Sphilip	evq = sc->evq[index];
1280227569Sphilip
1281227569Sphilip	/* Allocate and zero DMA space. */
1282280502Sarybchik	if ((rc = sfxge_dma_alloc(sc, EFX_RXQ_SIZE(sc->rxq_entries), esmp)) != 0)
1283227569Sphilip		return (rc);
1284227569Sphilip
1285227569Sphilip	/* Allocate buffer table entries. */
1286280502Sarybchik	sfxge_sram_buf_tbl_alloc(sc, EFX_RXQ_NBUFS(sc->rxq_entries),
1287227569Sphilip				 &rxq->buf_base_id);
1288227569Sphilip
1289227569Sphilip	/* Allocate the context array and the flow table. */
1290280502Sarybchik	rxq->queue = malloc(sizeof(struct sfxge_rx_sw_desc) * sc->rxq_entries,
1291227569Sphilip	    M_SFXGE, M_WAITOK | M_ZERO);
1292227569Sphilip	sfxge_lro_init(rxq);
1293227569Sphilip
1294227569Sphilip	callout_init(&rxq->refill_callout, B_TRUE);
1295227569Sphilip
1296227569Sphilip	rxq->init_state = SFXGE_RXQ_INITIALIZED;
1297227569Sphilip
1298227569Sphilip	return (0);
1299227569Sphilip}
1300227569Sphilip
1301227569Sphilipstatic const struct {
1302227569Sphilip	const char *name;
1303227569Sphilip	size_t offset;
1304227569Sphilip} sfxge_rx_stats[] = {
1305280501Sarybchik#define	SFXGE_RX_STAT(name, member) \
1306227569Sphilip	{ #name, offsetof(struct sfxge_rxq, member) }
1307280615Sarybchik#ifdef SFXGE_LRO
1308227569Sphilip	SFXGE_RX_STAT(lro_merges, lro.n_merges),
1309227569Sphilip	SFXGE_RX_STAT(lro_bursts, lro.n_bursts),
1310227569Sphilip	SFXGE_RX_STAT(lro_slow_start, lro.n_slow_start),
1311227569Sphilip	SFXGE_RX_STAT(lro_misorder, lro.n_misorder),
1312227569Sphilip	SFXGE_RX_STAT(lro_too_many, lro.n_too_many),
1313227569Sphilip	SFXGE_RX_STAT(lro_new_stream, lro.n_new_stream),
1314227569Sphilip	SFXGE_RX_STAT(lro_drop_idle, lro.n_drop_idle),
1315227569Sphilip	SFXGE_RX_STAT(lro_drop_closed, lro.n_drop_closed)
1316280615Sarybchik#endif
1317227569Sphilip};
1318227569Sphilip
1319227569Sphilipstatic int
1320227569Sphilipsfxge_rx_stat_handler(SYSCTL_HANDLER_ARGS)
1321227569Sphilip{
1322227569Sphilip	struct sfxge_softc *sc = arg1;
1323227569Sphilip	unsigned int id = arg2;
1324227569Sphilip	unsigned int sum, index;
1325227569Sphilip
1326227569Sphilip	/* Sum across all RX queues */
1327227569Sphilip	sum = 0;
1328280539Sarybchik	for (index = 0; index < sc->rxq_count; index++)
1329227569Sphilip		sum += *(unsigned int *)((caddr_t)sc->rxq[index] +
1330227569Sphilip					 sfxge_rx_stats[id].offset);
1331227569Sphilip
1332280501Sarybchik	return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1333227569Sphilip}
1334227569Sphilip
1335227569Sphilipstatic void
1336227569Sphilipsfxge_rx_stat_init(struct sfxge_softc *sc)
1337227569Sphilip{
1338227569Sphilip	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1339227569Sphilip	struct sysctl_oid_list *stat_list;
1340227569Sphilip	unsigned int id;
1341227569Sphilip
1342227569Sphilip	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1343227569Sphilip
1344280553Sarybchik	for (id = 0; id < nitems(sfxge_rx_stats); id++) {
1345227569Sphilip		SYSCTL_ADD_PROC(
1346227569Sphilip			ctx, stat_list,
1347227569Sphilip			OID_AUTO, sfxge_rx_stats[id].name,
1348227569Sphilip			CTLTYPE_UINT|CTLFLAG_RD,
1349227569Sphilip			sc, id, sfxge_rx_stat_handler, "IU",
1350227569Sphilip			"");
1351227569Sphilip	}
1352227569Sphilip}
1353227569Sphilip
1354227569Sphilipvoid
1355227569Sphilipsfxge_rx_fini(struct sfxge_softc *sc)
1356227569Sphilip{
1357227569Sphilip	int index;
1358227569Sphilip
1359280539Sarybchik	index = sc->rxq_count;
1360227569Sphilip	while (--index >= 0)
1361227569Sphilip		sfxge_rx_qfini(sc, index);
1362280539Sarybchik
1363280539Sarybchik	sc->rxq_count = 0;
1364227569Sphilip}
1365227569Sphilip
1366227569Sphilipint
1367227569Sphilipsfxge_rx_init(struct sfxge_softc *sc)
1368227569Sphilip{
1369227569Sphilip	struct sfxge_intr *intr;
1370227569Sphilip	int index;
1371227569Sphilip	int rc;
1372227569Sphilip
1373280615Sarybchik#ifdef SFXGE_LRO
1374280599Sarybchik	if (!ISP2(lro_table_size)) {
1375280599Sarybchik		log(LOG_ERR, "%s=%u must be power of 2",
1376280599Sarybchik		    SFXGE_LRO_PARAM(table_size), lro_table_size);
1377280599Sarybchik		rc = EINVAL;
1378280599Sarybchik		goto fail_lro_table_size;
1379280599Sarybchik	}
1380280599Sarybchik
1381227569Sphilip	if (lro_idle_ticks == 0)
1382227569Sphilip		lro_idle_ticks = hz / 10 + 1; /* 100 ms */
1383280615Sarybchik#endif
1384227569Sphilip
1385227569Sphilip	intr = &sc->intr;
1386227569Sphilip
1387280539Sarybchik	sc->rxq_count = intr->n_alloc;
1388280539Sarybchik
1389227569Sphilip	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1390227569Sphilip	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1391227569Sphilip
1392227569Sphilip	/* Initialize the receive queue(s) - one per interrupt. */
1393280539Sarybchik	for (index = 0; index < sc->rxq_count; index++) {
1394227569Sphilip		if ((rc = sfxge_rx_qinit(sc, index)) != 0)
1395227569Sphilip			goto fail;
1396227569Sphilip	}
1397227569Sphilip
1398227569Sphilip	sfxge_rx_stat_init(sc);
1399227569Sphilip
1400227569Sphilip	return (0);
1401227569Sphilip
1402227569Sphilipfail:
1403227569Sphilip	/* Tear down the receive queue(s). */
1404227569Sphilip	while (--index >= 0)
1405227569Sphilip		sfxge_rx_qfini(sc, index);
1406227569Sphilip
1407280539Sarybchik	sc->rxq_count = 0;
1408280599Sarybchik
1409280615Sarybchik#ifdef SFXGE_LRO
1410280599Sarybchikfail_lro_table_size:
1411280615Sarybchik#endif
1412227569Sphilip	return (rc);
1413227569Sphilip}
1414