1/*-
2 * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
3 * Copyright (c) 2017 Matthew Macy <mmacy@mattmacy.io>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28/* $FreeBSD$ */
29#include "if_em.h"
30
31#ifdef RSS
32#include <net/rss_config.h>
33#include <netinet/in_rss.h>
34#endif
35
36#ifdef VERBOSE_DEBUG
37#define DPRINTF device_printf
38#else
39#define DPRINTF(...)
40#endif
41
42/*********************************************************************
43 *  Local Function prototypes
44 *********************************************************************/
45static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper,
46    u32 *txd_lower);
47static int em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi,
48    u32 *txd_upper, u32 *txd_lower);
49static int em_isc_txd_encap(void *arg, if_pkt_info_t pi);
50static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
51static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
52static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru);
53static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
54    qidx_t pidx);
55static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
56    qidx_t budget);
57static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
58
59static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru);
60
61static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
62   qidx_t budget);
63static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
64
65static void lem_receive_checksum(int status, int errors, if_rxd_info_t ri);
66static void em_receive_checksum(uint32_t status, if_rxd_info_t ri);
67static int em_determine_rsstype(u32 pkt_info);
68extern int em_intr(void *arg);
69
70struct if_txrx em_txrx = {
71	.ift_txd_encap = em_isc_txd_encap,
72	.ift_txd_flush = em_isc_txd_flush,
73	.ift_txd_credits_update = em_isc_txd_credits_update,
74	.ift_rxd_available = em_isc_rxd_available,
75	.ift_rxd_pkt_get = em_isc_rxd_pkt_get,
76	.ift_rxd_refill = em_isc_rxd_refill,
77	.ift_rxd_flush = em_isc_rxd_flush,
78	.ift_legacy_intr = em_intr
79};
80
81struct if_txrx lem_txrx = {
82	.ift_txd_encap = em_isc_txd_encap,
83	.ift_txd_flush = em_isc_txd_flush,
84	.ift_txd_credits_update = em_isc_txd_credits_update,
85	.ift_rxd_available = lem_isc_rxd_available,
86	.ift_rxd_pkt_get = lem_isc_rxd_pkt_get,
87	.ift_rxd_refill = lem_isc_rxd_refill,
88	.ift_rxd_flush = em_isc_rxd_flush,
89	.ift_legacy_intr = em_intr
90};
91
92extern if_shared_ctx_t em_sctx;
93
94void
95em_dump_rs(struct adapter *adapter)
96{
97	if_softc_ctx_t scctx = adapter->shared;
98	struct em_tx_queue *que;
99	struct tx_ring *txr;
100	qidx_t i, ntxd, qid, cur;
101	int16_t rs_cidx;
102	uint8_t status;
103
104	printf("\n");
105	ntxd = scctx->isc_ntxd[0];
106	for (qid = 0; qid < adapter->tx_num_queues; qid++) {
107		que = &adapter->tx_queues[qid];
108		txr =  &que->txr;
109		rs_cidx = txr->tx_rs_cidx;
110		if (rs_cidx != txr->tx_rs_pidx) {
111			cur = txr->tx_rsq[rs_cidx];
112			status = txr->tx_base[cur].upper.fields.status;
113			if (!(status & E1000_TXD_STAT_DD))
114				printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur);
115		} else {
116			rs_cidx = (rs_cidx-1)&(ntxd-1);
117			cur = txr->tx_rsq[rs_cidx];
118			printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ", qid, rs_cidx, cur);
119		}
120		printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed, txr->tx_rs_pidx);
121		for (i = 0; i < ntxd; i++) {
122			if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD)
123				printf("%d set ", i);
124		}
125		printf("\n");
126	}
127}
128
129/**********************************************************************
130 *
131 *  Setup work for hardware segmentation offload (TSO) on
132 *  adapters using advanced tx descriptors
133 *
134 **********************************************************************/
135static int
136em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower)
137{
138	if_softc_ctx_t scctx = adapter->shared;
139	struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx];
140	struct tx_ring *txr = &que->txr;
141	struct e1000_context_desc *TXD;
142	int cur, hdr_len;
143
144	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
145	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
146		      E1000_TXD_DTYP_D |	/* Data descr type */
147		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
148
149	/* IP and/or TCP header checksum calculation and insertion. */
150	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
151
152	cur = pi->ipi_pidx;
153	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
154
155	/*
156	 * Start offset for header checksum calculation.
157	 * End offset for header checksum calculation.
158	 * Offset of place put the checksum.
159	 */
160	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
161	TXD->lower_setup.ip_fields.ipcse =
162	    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
163	TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
164
165	/*
166	 * Start offset for payload checksum calculation.
167	 * End offset for payload checksum calculation.
168	 * Offset of place to put the checksum.
169	 */
170	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
171	TXD->upper_setup.tcp_fields.tucse = 0;
172	TXD->upper_setup.tcp_fields.tucso =
173	    pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum);
174
175	/*
176	 * Payload size per packet w/o any headers.
177	 * Length of all headers up to payload.
178	 */
179	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
180	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
181
182	TXD->cmd_and_length = htole32(adapter->txd_cmd |
183				E1000_TXD_CMD_DEXT |	/* Extended descr */
184				E1000_TXD_CMD_TSE |	/* TSE context */
185				E1000_TXD_CMD_IP |	/* Do IP csum */
186				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
187				      (pi->ipi_len - hdr_len)); /* Total len */
188	txr->tx_tso = TRUE;
189
190	if (++cur == scctx->isc_ntxd[0]) {
191		cur = 0;
192	}
193	DPRINTF(iflib_get_dev(adapter->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur);
194	return (cur);
195}
196
197#define TSO_WORKAROUND 4
198#define DONT_FORCE_CTX 1
199
200
201/*********************************************************************
202 *  The offload context is protocol specific (TCP/UDP) and thus
203 *  only needs to be set when the protocol changes. The occasion
204 *  of a context change can be a performance detriment, and
205 *  might be better just disabled. The reason arises in the way
206 *  in which the controller supports pipelined requests from the
207 *  Tx data DMA. Up to four requests can be pipelined, and they may
208 *  belong to the same packet or to multiple packets. However all
209 *  requests for one packet are issued before a request is issued
210 *  for a subsequent packet and if a request for the next packet
211 *  requires a context change, that request will be stalled
212 *  until the previous request completes. This means setting up
213 *  a new context effectively disables pipelined Tx data DMA which
214 *  in turn greatly slow down performance to send small sized
215 *  frames.
216 **********************************************************************/
217
218static int
219em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower)
220{
221	 struct e1000_context_desc *TXD = NULL;
222	if_softc_ctx_t scctx = adapter->shared;
223	struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx];
224	struct tx_ring *txr = &que->txr;
225	int csum_flags = pi->ipi_csum_flags;
226	int cur, hdr_len;
227	u32 cmd;
228
229	cur = pi->ipi_pidx;
230	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
231	cmd = adapter->txd_cmd;
232
233	/*
234	 * The 82574L can only remember the *last* context used
235	 * regardless of queue that it was use for.  We cannot reuse
236	 * contexts on this hardware platform and must generate a new
237	 * context every time.  82574L hardware spec, section 7.2.6,
238	 * second note.
239	 */
240	if (DONT_FORCE_CTX &&
241	    adapter->tx_num_queues == 1 &&
242	    txr->csum_lhlen == pi->ipi_ehdrlen &&
243	    txr->csum_iphlen == pi->ipi_ip_hlen &&
244	    txr->csum_flags == csum_flags) {
245		/*
246		 * Same csum offload context as the previous packets;
247		 * just return.
248		 */
249		*txd_upper = txr->csum_txd_upper;
250		*txd_lower = txr->csum_txd_lower;
251		return (cur);
252	}
253
254	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
255	if (csum_flags & CSUM_IP) {
256		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
257		/*
258		 * Start offset for header checksum calculation.
259		 * End offset for header checksum calculation.
260		 * Offset of place to put the checksum.
261		 */
262		TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
263		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
264		TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
265		cmd |= E1000_TXD_CMD_IP;
266	}
267
268	if (csum_flags & (CSUM_TCP|CSUM_UDP)) {
269		uint8_t tucso;
270
271		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
272		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
273
274		if (csum_flags & CSUM_TCP) {
275			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
276			cmd |= E1000_TXD_CMD_TCP;
277		} else
278			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
279		TXD->upper_setup.tcp_fields.tucss = hdr_len;
280		TXD->upper_setup.tcp_fields.tucse = htole16(0);
281		TXD->upper_setup.tcp_fields.tucso = tucso;
282	}
283
284	txr->csum_lhlen = pi->ipi_ehdrlen;
285	txr->csum_iphlen = pi->ipi_ip_hlen;
286	txr->csum_flags = csum_flags;
287	txr->csum_txd_upper = *txd_upper;
288	txr->csum_txd_lower = *txd_lower;
289
290	TXD->tcp_seg_setup.data = htole32(0);
291	TXD->cmd_and_length =
292		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
293
294	if (++cur == scctx->isc_ntxd[0]) {
295		cur = 0;
296	}
297	DPRINTF(iflib_get_dev(adapter->ctx), "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n",
298		      csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
299	return (cur);
300}
301
302static int
303em_isc_txd_encap(void *arg, if_pkt_info_t pi)
304{
305	struct adapter *sc = arg;
306	if_softc_ctx_t scctx = sc->shared;
307	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
308	struct tx_ring *txr = &que->txr;
309	bus_dma_segment_t *segs = pi->ipi_segs;
310	int nsegs = pi->ipi_nsegs;
311	int csum_flags = pi->ipi_csum_flags;
312	int i, j, first, pidx_last;
313	u32 txd_flags, txd_upper = 0, txd_lower = 0;
314
315	struct e1000_tx_desc *ctxd = NULL;
316	bool do_tso, tso_desc;
317	qidx_t ntxd;
318
319	txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0;
320	i = first = pi->ipi_pidx;
321	do_tso = (csum_flags & CSUM_TSO);
322	tso_desc = FALSE;
323	ntxd = scctx->isc_ntxd[0];
324	/*
325	 * TSO Hardware workaround, if this packet is not
326	 * TSO, and is only a single descriptor long, and
327	 * it follows a TSO burst, then we need to add a
328	 * sentinel descriptor to prevent premature writeback.
329	 */
330	if ((!do_tso) && (txr->tx_tso == TRUE)) {
331		if (nsegs == 1)
332			tso_desc = TRUE;
333		txr->tx_tso = FALSE;
334	}
335
336	/* Do hardware assists */
337	if (do_tso) {
338		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
339		tso_desc = TRUE;
340	} else if (csum_flags & EM_CSUM_OFFLOAD) {
341		i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower);
342	}
343
344	if (pi->ipi_mflags & M_VLANTAG) {
345		/* Set the vlan id. */
346		txd_upper |= htole16(pi->ipi_vtag) << 16;
347		/* Tell hardware to add tag */
348		txd_lower |= htole32(E1000_TXD_CMD_VLE);
349	}
350
351	DPRINTF(iflib_get_dev(sc->ctx), "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
352	/* XXX adapter->pcix_82544 -- lem_fill_descriptors */
353
354	/* Set up our transmit descriptors */
355	for (j = 0; j < nsegs; j++) {
356		bus_size_t seg_len;
357		bus_addr_t seg_addr;
358		uint32_t cmd;
359
360		ctxd = &txr->tx_base[i];
361		seg_addr = segs[j].ds_addr;
362		seg_len = segs[j].ds_len;
363		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
364
365		/*
366		 * TSO Workaround:
367		 * If this is the last descriptor, we want to
368		 * split it so we have a small final sentinel
369		 */
370		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
371			seg_len -= TSO_WORKAROUND;
372			ctxd->buffer_addr = htole64(seg_addr);
373			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
374			ctxd->upper.data = htole32(txd_upper);
375
376			if (++i == scctx->isc_ntxd[0])
377				i = 0;
378
379			/* Now make the sentinel */
380			ctxd = &txr->tx_base[i];
381			ctxd->buffer_addr = htole64(seg_addr + seg_len);
382			ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND);
383			ctxd->upper.data = htole32(txd_upper);
384			pidx_last = i;
385			if (++i == scctx->isc_ntxd[0])
386				i = 0;
387			DPRINTF(iflib_get_dev(sc->ctx), "TSO path pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]);
388		} else {
389			ctxd->buffer_addr = htole64(seg_addr);
390			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
391			ctxd->upper.data = htole32(txd_upper);
392			pidx_last = i;
393			if (++i == scctx->isc_ntxd[0])
394				i = 0;
395			DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]);
396		}
397	}
398
399	/*
400	 * Last Descriptor of Packet
401	 * needs End Of Packet (EOP)
402	 * and Report Status (RS)
403	 */
404	if (txd_flags && nsegs) {
405		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
406		DPRINTF(iflib_get_dev(sc->ctx), "setting to RS on %d rs_pidx %d first: %d\n", pidx_last, txr->tx_rs_pidx, first);
407		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
408		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
409	}
410	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags);
411	DPRINTF(iflib_get_dev(sc->ctx), "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
412	pi->ipi_new_pidx = i;
413
414	return (0);
415}
416
417static void
418em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
419{
420	struct adapter *adapter = arg;
421	struct em_tx_queue *que = &adapter->tx_queues[txqid];
422	struct tx_ring *txr = &que->txr;
423
424	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx);
425}
426
427static int
428em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
429{
430	struct adapter *adapter = arg;
431	if_softc_ctx_t scctx = adapter->shared;
432	struct em_tx_queue *que = &adapter->tx_queues[txqid];
433	struct tx_ring *txr = &que->txr;
434
435	qidx_t processed = 0;
436	int updated;
437	qidx_t cur, prev, ntxd, rs_cidx;
438	int32_t delta;
439	uint8_t status;
440
441	rs_cidx = txr->tx_rs_cidx;
442	if (rs_cidx == txr->tx_rs_pidx)
443		return (0);
444	cur = txr->tx_rsq[rs_cidx];
445	MPASS(cur != QIDX_INVALID);
446	status = txr->tx_base[cur].upper.fields.status;
447	updated = !!(status & E1000_TXD_STAT_DD);
448
449	if (!updated)
450		return (0);
451
452	/* If clear is false just let caller know that there
453	 * are descriptors to reclaim */
454	if (!clear)
455		return (1);
456
457	prev = txr->tx_cidx_processed;
458	ntxd = scctx->isc_ntxd[0];
459	do {
460		MPASS(prev != cur);
461		delta = (int32_t)cur - (int32_t)prev;
462		if (delta < 0)
463			delta += ntxd;
464		MPASS(delta > 0);
465		DPRINTF(iflib_get_dev(adapter->ctx),
466			      "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
467			      __FUNCTION__, prev, cur, clear, delta);
468
469		processed += delta;
470		prev  = cur;
471		rs_cidx = (rs_cidx + 1) & (ntxd-1);
472		if (rs_cidx  == txr->tx_rs_pidx)
473			break;
474		cur = txr->tx_rsq[rs_cidx];
475		MPASS(cur != QIDX_INVALID);
476		status = txr->tx_base[cur].upper.fields.status;
477	} while ((status & E1000_TXD_STAT_DD));
478
479	txr->tx_rs_cidx = rs_cidx;
480	txr->tx_cidx_processed = prev;
481	return(processed);
482}
483
484static void
485lem_isc_rxd_refill(void *arg, if_rxd_update_t iru)
486{
487	struct adapter *sc = arg;
488	if_softc_ctx_t scctx = sc->shared;
489	struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx];
490	struct rx_ring *rxr = &que->rxr;
491	struct e1000_rx_desc *rxd;
492	uint64_t *paddrs;
493	uint32_t next_pidx, pidx;
494	uint16_t count;
495	int i;
496
497	paddrs = iru->iru_paddrs;
498	pidx = iru->iru_pidx;
499	count = iru->iru_count;
500
501	for (i = 0, next_pidx = pidx; i < count; i++) {
502		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
503		rxd->buffer_addr = htole64(paddrs[i]);
504		/* status bits must be cleared */
505		rxd->status = 0;
506
507		if (++next_pidx == scctx->isc_nrxd[0])
508			next_pidx = 0;
509	}
510}
511
512static void
513em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
514{
515	struct adapter *sc = arg;
516	if_softc_ctx_t scctx = sc->shared;
517	uint16_t rxqid = iru->iru_qsidx;
518	struct em_rx_queue *que = &sc->rx_queues[rxqid];
519	struct rx_ring *rxr = &que->rxr;
520	union e1000_rx_desc_extended *rxd;
521	uint64_t *paddrs;
522	uint32_t next_pidx, pidx;
523	uint16_t count;
524	int i;
525
526	paddrs = iru->iru_paddrs;
527	pidx = iru->iru_pidx;
528	count = iru->iru_count;
529
530	for (i = 0, next_pidx = pidx; i < count; i++) {
531		rxd = &rxr->rx_base[next_pidx];
532		rxd->read.buffer_addr = htole64(paddrs[i]);
533		/* DD bits must be cleared */
534		rxd->wb.upper.status_error = 0;
535
536		if (++next_pidx == scctx->isc_nrxd[0])
537			next_pidx = 0;
538	}
539}
540
541static void
542em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
543{
544	struct adapter *sc = arg;
545	struct em_rx_queue *que = &sc->rx_queues[rxqid];
546	struct rx_ring *rxr = &que->rxr;
547
548	E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
549}
550
551static int
552lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
553{
554	struct adapter *sc = arg;
555	if_softc_ctx_t scctx = sc->shared;
556	struct em_rx_queue *que = &sc->rx_queues[rxqid];
557	struct rx_ring *rxr = &que->rxr;
558	struct e1000_rx_desc *rxd;
559	u32 staterr = 0;
560	int cnt, i;
561
562	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
563		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
564		staterr = rxd->status;
565
566		if ((staterr & E1000_RXD_STAT_DD) == 0)
567			break;
568		if (++i == scctx->isc_nrxd[0])
569			i = 0;
570		if (staterr & E1000_RXD_STAT_EOP)
571			cnt++;
572	}
573	return (cnt);
574}
575
576static int
577em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
578{
579	struct adapter *sc = arg;
580	if_softc_ctx_t scctx = sc->shared;
581	struct em_rx_queue *que = &sc->rx_queues[rxqid];
582	struct rx_ring *rxr = &que->rxr;
583	union e1000_rx_desc_extended *rxd;
584	u32 staterr = 0;
585	int cnt, i;
586
587	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
588		rxd = &rxr->rx_base[i];
589		staterr = le32toh(rxd->wb.upper.status_error);
590
591		if ((staterr & E1000_RXD_STAT_DD) == 0)
592			break;
593		if (++i == scctx->isc_nrxd[0])
594			i = 0;
595		if (staterr & E1000_RXD_STAT_EOP)
596			cnt++;
597	}
598	return (cnt);
599}
600
601static int
602lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
603{
604	struct adapter *adapter = arg;
605	if_softc_ctx_t scctx = adapter->shared;
606	struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx];
607	struct rx_ring *rxr = &que->rxr;
608	struct e1000_rx_desc *rxd;
609	u16 len;
610	u32 status, errors;
611	bool eop;
612	int i, cidx;
613
614	status = errors = i = 0;
615	cidx = ri->iri_cidx;
616
617	do {
618		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
619		status = rxd->status;
620		errors = rxd->errors;
621
622		/* Error Checking then decrement count */
623		MPASS ((status & E1000_RXD_STAT_DD) != 0);
624
625		len = le16toh(rxd->length);
626		ri->iri_len += len;
627
628		eop = (status & E1000_RXD_STAT_EOP) != 0;
629
630		/* Make sure bad packets are discarded */
631		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
632			adapter->dropped_pkts++;
633			/* XXX fixup if common */
634			return (EBADMSG);
635		}
636
637		ri->iri_frags[i].irf_flid = 0;
638		ri->iri_frags[i].irf_idx = cidx;
639		ri->iri_frags[i].irf_len = len;
640		/* Zero out the receive descriptors status. */
641		rxd->status = 0;
642
643		if (++cidx == scctx->isc_nrxd[0])
644			cidx = 0;
645		i++;
646	} while (!eop);
647
648	/* XXX add a faster way to look this up */
649	if (adapter->hw.mac.type >= e1000_82543 && !(status & E1000_RXD_STAT_IXSM))
650		lem_receive_checksum(status, errors, ri);
651
652	if (status & E1000_RXD_STAT_VP) {
653		ri->iri_vtag = le16toh(rxd->special);
654		ri->iri_flags |= M_VLANTAG;
655	}
656
657	ri->iri_nfrags = i;
658
659	return (0);
660}
661
662static int
663em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
664{
665	struct adapter *adapter = arg;
666	if_softc_ctx_t scctx = adapter->shared;
667	struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx];
668	struct rx_ring *rxr = &que->rxr;
669	union e1000_rx_desc_extended *rxd;
670
671	u16 len;
672	u32 pkt_info;
673	u32 staterr = 0;
674	bool eop;
675	int i, cidx, vtag;
676
677	i = vtag = 0;
678	cidx = ri->iri_cidx;
679
680	do {
681		rxd = &rxr->rx_base[cidx];
682		staterr = le32toh(rxd->wb.upper.status_error);
683		pkt_info = le32toh(rxd->wb.lower.mrq);
684
685		/* Error Checking then decrement count */
686		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
687
688		len = le16toh(rxd->wb.upper.length);
689		ri->iri_len += len;
690
691		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
692
693		/* Make sure bad packets are discarded */
694		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
695			adapter->dropped_pkts++;
696			return EBADMSG;
697		}
698
699		ri->iri_frags[i].irf_flid = 0;
700		ri->iri_frags[i].irf_idx = cidx;
701		ri->iri_frags[i].irf_len = len;
702		/* Zero out the receive descriptors status. */
703		rxd->wb.upper.status_error &= htole32(~0xFF);
704
705		if (++cidx == scctx->isc_nrxd[0])
706			cidx = 0;
707		i++;
708	} while (!eop);
709
710	/* XXX add a faster way to look this up */
711	if (adapter->hw.mac.type >= e1000_82543)
712		em_receive_checksum(staterr, ri);
713
714	if (staterr & E1000_RXD_STAT_VP) {
715		vtag = le16toh(rxd->wb.upper.vlan);
716	}
717
718	ri->iri_vtag = vtag;
719	if (vtag)
720		ri->iri_flags |= M_VLANTAG;
721
722	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
723	ri->iri_rsstype = em_determine_rsstype(pkt_info);
724
725	ri->iri_nfrags = i;
726	return (0);
727}
728
729/*********************************************************************
730 *
731 *  Verify that the hardware indicated that the checksum is valid.
732 *  Inform the stack about the status of checksum so that stack
733 *  doesn't spend time verifying the checksum.
734 *
735 *********************************************************************/
736static void
737lem_receive_checksum(int status, int errors, if_rxd_info_t ri)
738{
739	/* Did it pass? */
740	if (status & E1000_RXD_STAT_IPCS && !(errors & E1000_RXD_ERR_IPE))
741		ri->iri_csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
742
743	if (status & E1000_RXD_STAT_TCPCS) {
744		/* Did it pass? */
745		if (!(errors & E1000_RXD_ERR_TCPE)) {
746			ri->iri_csum_flags |=
747			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
748			ri->iri_csum_data = htons(0xffff);
749		}
750	}
751}
752
753/********************************************************************
754 *
755 *  Parse the packet type to determine the appropriate hash
756 *
757 ******************************************************************/
758static int
759em_determine_rsstype(u32 pkt_info)
760{
761	switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
762	case E1000_RXDADV_RSSTYPE_IPV4_TCP:
763		return M_HASHTYPE_RSS_TCP_IPV4;
764	case E1000_RXDADV_RSSTYPE_IPV4:
765		return M_HASHTYPE_RSS_IPV4;
766	case E1000_RXDADV_RSSTYPE_IPV6_TCP:
767		return M_HASHTYPE_RSS_TCP_IPV6;
768	case E1000_RXDADV_RSSTYPE_IPV6_EX:
769		return M_HASHTYPE_RSS_IPV6_EX;
770	case E1000_RXDADV_RSSTYPE_IPV6:
771		return M_HASHTYPE_RSS_IPV6;
772	case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
773		return M_HASHTYPE_RSS_TCP_IPV6_EX;
774	default:
775		return M_HASHTYPE_OPAQUE;
776	}
777}
778
779static void
780em_receive_checksum(uint32_t status, if_rxd_info_t ri)
781{
782	ri->iri_csum_flags = 0;
783
784	/* Ignore Checksum bit is set */
785	if (status & E1000_RXD_STAT_IXSM)
786		return;
787
788	/* If the IP checksum exists and there is no IP Checksum error */
789	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
790	    E1000_RXD_STAT_IPCS) {
791		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
792	}
793
794	/* TCP or UDP checksum */
795	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
796	    E1000_RXD_STAT_TCPCS) {
797		ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
798		ri->iri_csum_data = htons(0xffff);
799	}
800	if (status & E1000_RXD_STAT_UDPCS) {
801		ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
802		ri->iri_csum_data = htons(0xffff);
803	}
804}
805