1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
5 * Copyright (c) 2017 Matthew Macy <mmacy@mattmacy.io>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include "if_em.h"
31
32#ifdef RSS
33#include <net/rss_config.h>
34#include <netinet/in_rss.h>
35#endif
36
37#ifdef VERBOSE_DEBUG
38#define DPRINTF device_printf
39#else
40#define DPRINTF(...)
41#endif
42
43/*********************************************************************
44 *  Local Function prototypes
45 *********************************************************************/
46static int em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi,
47    uint32_t *txd_upper, uint32_t *txd_lower);
48static int em_transmit_checksum_setup(struct e1000_softc *sc,
49    if_pkt_info_t pi, uint32_t *txd_upper, uint32_t *txd_lower);
50static int em_isc_txd_encap(void *arg, if_pkt_info_t pi);
51static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
52static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
53static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru);
54static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
55    qidx_t pidx);
56static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
57    qidx_t budget);
58static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
59
60static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru);
61
62static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
63   qidx_t budget);
64static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
65
66static void em_receive_checksum(uint16_t, uint8_t, if_rxd_info_t);
67static int em_determine_rsstype(uint32_t pkt_info);
68extern int em_intr(void *arg);
69
70struct if_txrx em_txrx = {
71	.ift_txd_encap = em_isc_txd_encap,
72	.ift_txd_flush = em_isc_txd_flush,
73	.ift_txd_credits_update = em_isc_txd_credits_update,
74	.ift_rxd_available = em_isc_rxd_available,
75	.ift_rxd_pkt_get = em_isc_rxd_pkt_get,
76	.ift_rxd_refill = em_isc_rxd_refill,
77	.ift_rxd_flush = em_isc_rxd_flush,
78	.ift_legacy_intr = em_intr
79};
80
81struct if_txrx lem_txrx = {
82	.ift_txd_encap = em_isc_txd_encap,
83	.ift_txd_flush = em_isc_txd_flush,
84	.ift_txd_credits_update = em_isc_txd_credits_update,
85	.ift_rxd_available = lem_isc_rxd_available,
86	.ift_rxd_pkt_get = lem_isc_rxd_pkt_get,
87	.ift_rxd_refill = lem_isc_rxd_refill,
88	.ift_rxd_flush = em_isc_rxd_flush,
89	.ift_legacy_intr = em_intr
90};
91
92extern if_shared_ctx_t em_sctx;
93
94void
95em_dump_rs(struct e1000_softc *sc)
96{
97	if_softc_ctx_t scctx = sc->shared;
98	struct em_tx_queue *que;
99	struct tx_ring *txr;
100	qidx_t i, ntxd, qid, cur;
101	int16_t rs_cidx;
102	uint8_t status;
103
104	printf("\n");
105	ntxd = scctx->isc_ntxd[0];
106	for (qid = 0; qid < sc->tx_num_queues; qid++) {
107		que = &sc->tx_queues[qid];
108		txr =  &que->txr;
109		rs_cidx = txr->tx_rs_cidx;
110		if (rs_cidx != txr->tx_rs_pidx) {
111			cur = txr->tx_rsq[rs_cidx];
112			status = txr->tx_base[cur].upper.fields.status;
113			if (!(status & E1000_TXD_STAT_DD))
114				printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur);
115		} else {
116			rs_cidx = (rs_cidx-1)&(ntxd-1);
117			cur = txr->tx_rsq[rs_cidx];
118			printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ", qid, rs_cidx, cur);
119		}
120		printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed,
121		    txr->tx_rs_pidx);
122		for (i = 0; i < ntxd; i++) {
123			if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD)
124				printf("%d set ", i);
125		}
126		printf("\n");
127	}
128}
129
130/**********************************************************************
131 *
132 *  Setup work for hardware segmentation offload (TSO) on
133 *  adapters using advanced tx descriptors
134 *
135 **********************************************************************/
136static int
137em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, uint32_t *txd_upper,
138    uint32_t *txd_lower)
139{
140	if_softc_ctx_t scctx = sc->shared;
141	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
142	struct tx_ring *txr = &que->txr;
143	struct e1000_context_desc *TXD;
144	int cur, hdr_len;
145	uint32_t cmd_type_len;
146
147	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
148	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
149		      E1000_TXD_DTYP_D |	/* Data descr type */
150		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
151
152	cur = pi->ipi_pidx;
153	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
154
155	/*
156	 * ipcss - Start offset for header checksum calculation.
157	 * ipcse - End offset for header checksum calculation.
158	 * ipcso - Offset of place to put the checksum.
159	 */
160	switch(pi->ipi_etype) {
161	case ETHERTYPE_IP:
162		/* IP and/or TCP header checksum calculation and insertion. */
163		*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
164
165		TXD->lower_setup.ip_fields.ipcse =
166		    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
167		break;
168	case ETHERTYPE_IPV6:
169		/* TCP header checksum calculation and insertion. */
170		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
171
172		TXD->lower_setup.ip_fields.ipcse = htole16(0);
173		break;
174	default:
175		break;
176	}
177	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
178	TXD->lower_setup.ip_fields.ipcso =
179	    pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
180
181	/*
182	 * tucss - Start offset for payload checksum calculation.
183	 * tucse - End offset for payload checksum calculation.
184	 * tucso - Offset of place to put the checksum.
185	 */
186	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
187	TXD->upper_setup.tcp_fields.tucse = 0;
188	TXD->upper_setup.tcp_fields.tucso =
189	    pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum);
190
191	/*
192	 * Payload size per packet w/o any headers.
193	 * Length of all headers up to payload.
194	 */
195	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
196	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
197
198	/*
199	 * "PCI/PCI-X SDM 4.0" page 45, and "PCIe GbE SDM 2.5" page 63
200	 * - Set up basic TUCMDs
201	 * - For others IP bit on indicates IPv4, while off indicates IPv6
202	*/
203	cmd_type_len = sc->txd_cmd |
204	    E1000_TXD_CMD_DEXT | /* Extended descr */
205	    E1000_TXD_CMD_TSE |  /* TSE context */
206	    E1000_TXD_CMD_TCP;   /* Do TCP checksum */
207	if (pi->ipi_etype == ETHERTYPE_IP)
208		cmd_type_len |= E1000_TXD_CMD_IP;
209	TXD->cmd_and_length = htole32(cmd_type_len |
210	    (pi->ipi_len - hdr_len)); /* Total len */
211
212	txr->tx_tso = true;
213
214	if (++cur == scctx->isc_ntxd[0]) {
215		cur = 0;
216	}
217	DPRINTF(iflib_get_dev(sc->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__,
218	    pi->ipi_pidx, cur);
219	return (cur);
220}
221
222/*********************************************************************
223 *  The offload context is protocol specific (TCP/UDP) and thus
224 *  only needs to be set when the protocol changes. The occasion
225 *  of a context change can be a performance detriment, and
226 *  might be better just disabled. The reason arises in the way
227 *  in which the controller supports pipelined requests from the
228 *  Tx data DMA. Up to four requests can be pipelined, and they may
229 *  belong to the same packet or to multiple packets. However all
230 *  requests for one packet are issued before a request is issued
231 *  for a subsequent packet and if a request for the next packet
232 *  requires a context change, that request will be stalled
233 *  until the previous request completes. This means setting up
234 *  a new context effectively disables pipelined Tx data DMA which
235 *  in turn greatly slow down performance to send small sized
236 *  frames.
237 **********************************************************************/
238#define DONT_FORCE_CTX 1
239
240static int
241em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi,
242    uint32_t *txd_upper, uint32_t *txd_lower)
243{
244	struct e1000_context_desc *TXD = NULL;
245	if_softc_ctx_t scctx = sc->shared;
246	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
247	struct tx_ring *txr = &que->txr;
248	int csum_flags = pi->ipi_csum_flags;
249	int cur, hdr_len;
250	uint32_t cmd;
251
252	cur = pi->ipi_pidx;
253	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
254	cmd = sc->txd_cmd;
255
256	/*
257	 * The 82574L can only remember the *last* context used
258	 * regardless of queue that it was use for.  We cannot reuse
259	 * contexts on this hardware platform and must generate a new
260	 * context every time.  82574L hardware spec, section 7.2.6,
261	 * second note.
262	 */
263	if (DONT_FORCE_CTX &&
264	    sc->tx_num_queues == 1 &&
265	    txr->csum_lhlen == pi->ipi_ehdrlen &&
266	    txr->csum_iphlen == pi->ipi_ip_hlen &&
267	    txr->csum_flags == csum_flags) {
268		/*
269		 * Same csum offload context as the previous packets;
270		 * just return.
271		 */
272		*txd_upper = txr->csum_txd_upper;
273		*txd_lower = txr->csum_txd_lower;
274		return (cur);
275	}
276
277	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
278	/*
279	 * ipcss - Start offset for header checksum calculation.
280	 * ipcse - End offset for header checksum calculation.
281	 * ipcso - Offset of place to put the checksum.
282	 *
283	 * We set ipcsX values regardless of IP version to work around HW issues
284	 * and ipcse must be 0 for IPv6 per "PCIe GbE SDM 2.5" page 61.
285	 * IXSM controls whether it's inserted.
286	 */
287	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
288	TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen +
289	    offsetof(struct ip, ip_sum);
290	if (csum_flags & CSUM_IP) {
291		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
292		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len - 1);
293		cmd |= E1000_TXD_CMD_IP;
294	} else if (csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP))
295		TXD->lower_setup.ip_fields.ipcse = htole16(0);
296
297	/*
298	 * tucss - Start offset for payload checksum calculation.
299	 * tucse - End offset for payload checksum calculation.
300	 * tucso - Offset of place to put the checksum.
301	 */
302	if (csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_IP6_TCP | CSUM_IP6_UDP)) {
303		uint8_t tucso;
304
305		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
306		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
307
308		if (csum_flags & (CSUM_TCP | CSUM_IP6_TCP)) {
309			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
310			cmd |= E1000_TXD_CMD_TCP;
311		} else
312			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
313		TXD->upper_setup.tcp_fields.tucss = hdr_len;
314		TXD->upper_setup.tcp_fields.tucse = htole16(0);
315		TXD->upper_setup.tcp_fields.tucso = tucso;
316	}
317
318	txr->csum_lhlen = pi->ipi_ehdrlen;
319	txr->csum_iphlen = pi->ipi_ip_hlen;
320	txr->csum_flags = csum_flags;
321	txr->csum_txd_upper = *txd_upper;
322	txr->csum_txd_lower = *txd_lower;
323
324	TXD->tcp_seg_setup.data = htole32(0);
325	TXD->cmd_and_length =
326		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
327
328	if (++cur == scctx->isc_ntxd[0]) {
329		cur = 0;
330	}
331	DPRINTF(iflib_get_dev(sc->ctx),
332	    "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n",
333	    csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
334	return (cur);
335}
336
337#define TSO_WORKAROUND 4 /* TSO sentinel descriptor */
338
339static int
340em_isc_txd_encap(void *arg, if_pkt_info_t pi)
341{
342	struct e1000_softc *sc = arg;
343	if_softc_ctx_t scctx = sc->shared;
344	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
345	struct tx_ring *txr = &que->txr;
346	bus_dma_segment_t *segs = pi->ipi_segs;
347	int nsegs = pi->ipi_nsegs;
348	int csum_flags = pi->ipi_csum_flags;
349	int i, j, first, pidx_last;
350	uint32_t txd_flags, txd_upper = 0, txd_lower = 0;
351
352	struct e1000_tx_desc *ctxd = NULL;
353	bool do_tso, tso_desc;
354	qidx_t ntxd;
355
356	txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0;
357	i = first = pi->ipi_pidx;
358	do_tso = (csum_flags & CSUM_TSO);
359	tso_desc = false;
360	ntxd = scctx->isc_ntxd[0];
361	/*
362	 * TSO Hardware workaround, if this packet is not
363	 * TSO, and is only a single descriptor long, and
364	 * it follows a TSO burst, then we need to add a
365	 * sentinel descriptor to prevent premature writeback.
366	 */
367	if ((!do_tso) && (txr->tx_tso == true)) {
368		if (nsegs == 1)
369			tso_desc = true;
370		txr->tx_tso = false;
371	}
372
373	/* Do hardware assists */
374	if (do_tso) {
375		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
376		tso_desc = true;
377	} else if (csum_flags & EM_CSUM_OFFLOAD) {
378		i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower);
379	}
380
381	if (pi->ipi_mflags & M_VLANTAG) {
382		/* Set the vlan id. */
383		txd_upper |= htole16(pi->ipi_vtag) << 16;
384		/* Tell hardware to add tag */
385		txd_lower |= htole32(E1000_TXD_CMD_VLE);
386	}
387
388	DPRINTF(iflib_get_dev(sc->ctx),
389	    "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
390	/* XXX sc->pcix_82544 -- lem_fill_descriptors */
391
392	/* Set up our transmit descriptors */
393	for (j = 0; j < nsegs; j++) {
394		bus_size_t seg_len;
395		bus_addr_t seg_addr;
396		uint32_t cmd;
397
398		ctxd = &txr->tx_base[i];
399		seg_addr = segs[j].ds_addr;
400		seg_len = segs[j].ds_len;
401		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
402
403		/*
404		 * TSO Workaround:
405		 * If this is the last descriptor, we want to
406		 * split it so we have a small final sentinel
407		 */
408		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
409			seg_len -= TSO_WORKAROUND;
410			ctxd->buffer_addr = htole64(seg_addr);
411			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
412			ctxd->upper.data = htole32(txd_upper);
413
414			if (++i == scctx->isc_ntxd[0])
415				i = 0;
416
417			/* Now make the sentinel */
418			ctxd = &txr->tx_base[i];
419			ctxd->buffer_addr = htole64(seg_addr + seg_len);
420			ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND);
421			ctxd->upper.data = htole32(txd_upper);
422			pidx_last = i;
423			if (++i == scctx->isc_ntxd[0])
424				i = 0;
425			DPRINTF(iflib_get_dev(sc->ctx),
426			    "TSO path pidx_last=%d i=%d ntxd[0]=%d\n",
427			    pidx_last, i, scctx->isc_ntxd[0]);
428		} else {
429			ctxd->buffer_addr = htole64(seg_addr);
430			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
431			ctxd->upper.data = htole32(txd_upper);
432			pidx_last = i;
433			if (++i == scctx->isc_ntxd[0])
434				i = 0;
435			DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n",
436			    pidx_last, i, scctx->isc_ntxd[0]);
437		}
438	}
439
440	/*
441	 * Last Descriptor of Packet
442	 * needs End Of Packet (EOP)
443	 * and Report Status (RS)
444	 */
445	if (txd_flags && nsegs) {
446		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
447		DPRINTF(iflib_get_dev(sc->ctx),
448		    "setting to RS on %d rs_pidx %d first: %d\n",
449		    pidx_last, txr->tx_rs_pidx, first);
450		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
451		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
452	}
453	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags);
454	DPRINTF(iflib_get_dev(sc->ctx),
455	    "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
456	pi->ipi_new_pidx = i;
457
458	return (0);
459}
460
461static void
462em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
463{
464	struct e1000_softc *sc = arg;
465	struct em_tx_queue *que = &sc->tx_queues[txqid];
466	struct tx_ring *txr = &que->txr;
467
468	E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx);
469}
470
471static int
472em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
473{
474	struct e1000_softc *sc = arg;
475	if_softc_ctx_t scctx = sc->shared;
476	struct em_tx_queue *que = &sc->tx_queues[txqid];
477	struct tx_ring *txr = &que->txr;
478
479	qidx_t processed = 0;
480	int updated;
481	qidx_t cur, prev, ntxd, rs_cidx;
482	int32_t delta;
483	uint8_t status;
484
485	rs_cidx = txr->tx_rs_cidx;
486	if (rs_cidx == txr->tx_rs_pidx)
487		return (0);
488	cur = txr->tx_rsq[rs_cidx];
489	MPASS(cur != QIDX_INVALID);
490	status = txr->tx_base[cur].upper.fields.status;
491	updated = !!(status & E1000_TXD_STAT_DD);
492
493	if (!updated)
494		return (0);
495
496	/* If clear is false just let caller know that there
497	 * are descriptors to reclaim */
498	if (!clear)
499		return (1);
500
501	prev = txr->tx_cidx_processed;
502	ntxd = scctx->isc_ntxd[0];
503	do {
504		MPASS(prev != cur);
505		delta = (int32_t)cur - (int32_t)prev;
506		if (delta < 0)
507			delta += ntxd;
508		MPASS(delta > 0);
509		DPRINTF(iflib_get_dev(sc->ctx),
510			      "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
511			      __FUNCTION__, prev, cur, clear, delta);
512
513		processed += delta;
514		prev  = cur;
515		rs_cidx = (rs_cidx + 1) & (ntxd-1);
516		if (rs_cidx  == txr->tx_rs_pidx)
517			break;
518		cur = txr->tx_rsq[rs_cidx];
519		MPASS(cur != QIDX_INVALID);
520		status = txr->tx_base[cur].upper.fields.status;
521	} while ((status & E1000_TXD_STAT_DD));
522
523	txr->tx_rs_cidx = rs_cidx;
524	txr->tx_cidx_processed = prev;
525	return(processed);
526}
527
528static void
529lem_isc_rxd_refill(void *arg, if_rxd_update_t iru)
530{
531	struct e1000_softc *sc = arg;
532	if_softc_ctx_t scctx = sc->shared;
533	struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx];
534	struct rx_ring *rxr = &que->rxr;
535	struct e1000_rx_desc *rxd;
536	uint64_t *paddrs;
537	uint32_t next_pidx, pidx;
538	uint16_t count;
539	int i;
540
541	paddrs = iru->iru_paddrs;
542	pidx = iru->iru_pidx;
543	count = iru->iru_count;
544
545	for (i = 0, next_pidx = pidx; i < count; i++) {
546		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
547		rxd->buffer_addr = htole64(paddrs[i]);
548		/* status bits must be cleared */
549		rxd->status = 0;
550
551		if (++next_pidx == scctx->isc_nrxd[0])
552			next_pidx = 0;
553	}
554}
555
556static void
557em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
558{
559	struct e1000_softc *sc = arg;
560	if_softc_ctx_t scctx = sc->shared;
561	uint16_t rxqid = iru->iru_qsidx;
562	struct em_rx_queue *que = &sc->rx_queues[rxqid];
563	struct rx_ring *rxr = &que->rxr;
564	union e1000_rx_desc_extended *rxd;
565	uint64_t *paddrs;
566	uint32_t next_pidx, pidx;
567	uint16_t count;
568	int i;
569
570	paddrs = iru->iru_paddrs;
571	pidx = iru->iru_pidx;
572	count = iru->iru_count;
573
574	for (i = 0, next_pidx = pidx; i < count; i++) {
575		rxd = &rxr->rx_base[next_pidx];
576		rxd->read.buffer_addr = htole64(paddrs[i]);
577		/* DD bits must be cleared */
578		rxd->wb.upper.status_error = 0;
579
580		if (++next_pidx == scctx->isc_nrxd[0])
581			next_pidx = 0;
582	}
583}
584
585static void
586em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
587    qidx_t pidx)
588{
589	struct e1000_softc *sc = arg;
590	struct em_rx_queue *que = &sc->rx_queues[rxqid];
591	struct rx_ring *rxr = &que->rxr;
592
593	E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
594}
595
596static int
597lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
598{
599	struct e1000_softc *sc = arg;
600	if_softc_ctx_t scctx = sc->shared;
601	struct em_rx_queue *que = &sc->rx_queues[rxqid];
602	struct rx_ring *rxr = &que->rxr;
603	struct e1000_rx_desc *rxd;
604	uint32_t staterr = 0;
605	int cnt, i;
606
607	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
608		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
609		staterr = rxd->status;
610
611		if ((staterr & E1000_RXD_STAT_DD) == 0)
612			break;
613		if (++i == scctx->isc_nrxd[0])
614			i = 0;
615		if (staterr & E1000_RXD_STAT_EOP)
616			cnt++;
617	}
618	return (cnt);
619}
620
621static int
622em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
623{
624	struct e1000_softc *sc = arg;
625	if_softc_ctx_t scctx = sc->shared;
626	struct em_rx_queue *que = &sc->rx_queues[rxqid];
627	struct rx_ring *rxr = &que->rxr;
628	union e1000_rx_desc_extended *rxd;
629	uint32_t staterr = 0;
630	int cnt, i;
631
632	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
633		rxd = &rxr->rx_base[i];
634		staterr = le32toh(rxd->wb.upper.status_error);
635
636		if ((staterr & E1000_RXD_STAT_DD) == 0)
637			break;
638		if (++i == scctx->isc_nrxd[0])
639			i = 0;
640		if (staterr & E1000_RXD_STAT_EOP)
641			cnt++;
642	}
643	return (cnt);
644}
645
646static int
647lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
648{
649	struct e1000_softc *sc = arg;
650	if_softc_ctx_t scctx = sc->shared;
651	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
652	struct rx_ring *rxr = &que->rxr;
653	struct e1000_rx_desc *rxd;
654	uint16_t len;
655	uint32_t status, errors;
656	bool eop;
657	int i, cidx;
658
659	status = errors = i = 0;
660	cidx = ri->iri_cidx;
661
662	do {
663		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
664		status = rxd->status;
665		errors = rxd->errors;
666
667		/* Error Checking then decrement count */
668		MPASS ((status & E1000_RXD_STAT_DD) != 0);
669
670		len = le16toh(rxd->length);
671		ri->iri_len += len;
672
673		eop = (status & E1000_RXD_STAT_EOP) != 0;
674
675		/* Make sure bad packets are discarded */
676		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
677			sc->dropped_pkts++;
678			/* XXX fixup if common */
679			return (EBADMSG);
680		}
681
682		ri->iri_frags[i].irf_flid = 0;
683		ri->iri_frags[i].irf_idx = cidx;
684		ri->iri_frags[i].irf_len = len;
685		/* Zero out the receive descriptors status. */
686		rxd->status = 0;
687
688		if (++cidx == scctx->isc_nrxd[0])
689			cidx = 0;
690		i++;
691	} while (!eop);
692
693	if (scctx->isc_capenable & IFCAP_RXCSUM)
694		em_receive_checksum(status, errors, ri);
695
696	if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING &&
697	    status & E1000_RXD_STAT_VP) {
698		ri->iri_vtag = le16toh(rxd->special & E1000_RXD_SPC_VLAN_MASK);
699		ri->iri_flags |= M_VLANTAG;
700	}
701
702	ri->iri_nfrags = i;
703
704	return (0);
705}
706
707static int
708em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
709{
710	struct e1000_softc *sc = arg;
711	if_softc_ctx_t scctx = sc->shared;
712	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
713	struct rx_ring *rxr = &que->rxr;
714	union e1000_rx_desc_extended *rxd;
715
716	uint16_t len;
717	uint32_t pkt_info;
718	uint32_t staterr;
719	bool eop;
720	int i, cidx;
721
722	staterr = i = 0;
723	cidx = ri->iri_cidx;
724
725	do {
726		rxd = &rxr->rx_base[cidx];
727		staterr = le32toh(rxd->wb.upper.status_error);
728		pkt_info = le32toh(rxd->wb.lower.mrq);
729
730		/* Error Checking then decrement count */
731		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
732
733		len = le16toh(rxd->wb.upper.length);
734		ri->iri_len += len;
735
736		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
737
738		/* Make sure bad packets are discarded */
739		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
740			sc->dropped_pkts++;
741			return EBADMSG;
742		}
743
744		ri->iri_frags[i].irf_flid = 0;
745		ri->iri_frags[i].irf_idx = cidx;
746		ri->iri_frags[i].irf_len = len;
747		/* Zero out the receive descriptors status. */
748		rxd->wb.upper.status_error &= htole32(~0xFF);
749
750		if (++cidx == scctx->isc_nrxd[0])
751			cidx = 0;
752		i++;
753	} while (!eop);
754
755	if (scctx->isc_capenable & IFCAP_RXCSUM)
756		em_receive_checksum(staterr, staterr >> 24, ri);
757
758	if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING &&
759	    staterr & E1000_RXD_STAT_VP) {
760		ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
761		ri->iri_flags |= M_VLANTAG;
762	}
763
764	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
765	ri->iri_rsstype = em_determine_rsstype(pkt_info);
766
767	ri->iri_nfrags = i;
768	return (0);
769}
770
771/*********************************************************************
772 *
773 *  Verify that the hardware indicated that the checksum is valid.
774 *  Inform the stack about the status of checksum so that stack
775 *  doesn't spend time verifying the checksum.
776 *
777 *********************************************************************/
778static void
779em_receive_checksum(uint16_t status, uint8_t errors, if_rxd_info_t ri)
780{
781	if (__predict_false(status & E1000_RXD_STAT_IXSM))
782		return;
783
784	/* If there is a layer 3 or 4 error we are done */
785	if (__predict_false(errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE)))
786		return;
787
788	/* IP Checksum Good */
789	if (status & E1000_RXD_STAT_IPCS)
790		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
791
792	/* Valid L4E checksum */
793	if (__predict_true(status &
794	    (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) {
795		ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
796		ri->iri_csum_data = htons(0xffff);
797	}
798}
799
800/********************************************************************
801 *
802 *  Parse the packet type to determine the appropriate hash
803 *
804 ******************************************************************/
805static int
806em_determine_rsstype(uint32_t pkt_info)
807{
808	switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
809	case E1000_RXDADV_RSSTYPE_IPV4_TCP:
810		return M_HASHTYPE_RSS_TCP_IPV4;
811	case E1000_RXDADV_RSSTYPE_IPV4:
812		return M_HASHTYPE_RSS_IPV4;
813	case E1000_RXDADV_RSSTYPE_IPV6_TCP:
814		return M_HASHTYPE_RSS_TCP_IPV6;
815	case E1000_RXDADV_RSSTYPE_IPV6_EX:
816		return M_HASHTYPE_RSS_IPV6_EX;
817	case E1000_RXDADV_RSSTYPE_IPV6:
818		return M_HASHTYPE_RSS_IPV6;
819	case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
820		return M_HASHTYPE_RSS_TCP_IPV6_EX;
821	default:
822		return M_HASHTYPE_OPAQUE;
823	}
824}
825