1/*-
2 * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
3 * Copyright (c) 2017 Matthew Macy <mmacy@mattmacy.io>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28/* $FreeBSD$ */
29#include "if_em.h"
30
31#ifdef RSS
32#include <net/rss_config.h>
33#include <netinet/in_rss.h>
34#endif
35
36#ifdef VERBOSE_DEBUG
37#define DPRINTF device_printf
38#else
39#define DPRINTF(...)
40#endif
41
42/*********************************************************************
43 *  Local Function prototypes
44 *********************************************************************/
45static int em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, u32 *txd_upper,
46    u32 *txd_lower);
47static int em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi,
48    u32 *txd_upper, u32 *txd_lower);
49static int em_isc_txd_encap(void *arg, if_pkt_info_t pi);
50static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
51static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
52static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru);
53static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
54    qidx_t pidx);
55static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
56    qidx_t budget);
57static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
58
59static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru);
60
61static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
62   qidx_t budget);
63static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
64
65static void em_receive_checksum(uint16_t, uint8_t, if_rxd_info_t);
66static int em_determine_rsstype(u32 pkt_info);
67extern int em_intr(void *arg);
68
69struct if_txrx em_txrx = {
70	.ift_txd_encap = em_isc_txd_encap,
71	.ift_txd_flush = em_isc_txd_flush,
72	.ift_txd_credits_update = em_isc_txd_credits_update,
73	.ift_rxd_available = em_isc_rxd_available,
74	.ift_rxd_pkt_get = em_isc_rxd_pkt_get,
75	.ift_rxd_refill = em_isc_rxd_refill,
76	.ift_rxd_flush = em_isc_rxd_flush,
77	.ift_legacy_intr = em_intr
78};
79
80struct if_txrx lem_txrx = {
81	.ift_txd_encap = em_isc_txd_encap,
82	.ift_txd_flush = em_isc_txd_flush,
83	.ift_txd_credits_update = em_isc_txd_credits_update,
84	.ift_rxd_available = lem_isc_rxd_available,
85	.ift_rxd_pkt_get = lem_isc_rxd_pkt_get,
86	.ift_rxd_refill = lem_isc_rxd_refill,
87	.ift_rxd_flush = em_isc_rxd_flush,
88	.ift_legacy_intr = em_intr
89};
90
91extern if_shared_ctx_t em_sctx;
92
93void
94em_dump_rs(struct e1000_softc *sc)
95{
96	if_softc_ctx_t scctx = sc->shared;
97	struct em_tx_queue *que;
98	struct tx_ring *txr;
99	qidx_t i, ntxd, qid, cur;
100	int16_t rs_cidx;
101	uint8_t status;
102
103	printf("\n");
104	ntxd = scctx->isc_ntxd[0];
105	for (qid = 0; qid < sc->tx_num_queues; qid++) {
106		que = &sc->tx_queues[qid];
107		txr =  &que->txr;
108		rs_cidx = txr->tx_rs_cidx;
109		if (rs_cidx != txr->tx_rs_pidx) {
110			cur = txr->tx_rsq[rs_cidx];
111			status = txr->tx_base[cur].upper.fields.status;
112			if (!(status & E1000_TXD_STAT_DD))
113				printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur);
114		} else {
115			rs_cidx = (rs_cidx-1)&(ntxd-1);
116			cur = txr->tx_rsq[rs_cidx];
117			printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ", qid, rs_cidx, cur);
118		}
119		printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed, txr->tx_rs_pidx);
120		for (i = 0; i < ntxd; i++) {
121			if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD)
122				printf("%d set ", i);
123		}
124		printf("\n");
125	}
126}
127
128/**********************************************************************
129 *
130 *  Setup work for hardware segmentation offload (TSO) on
131 *  adapters using advanced tx descriptors
132 *
133 **********************************************************************/
134static int
135em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower)
136{
137	if_softc_ctx_t scctx = sc->shared;
138	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
139	struct tx_ring *txr = &que->txr;
140	struct e1000_context_desc *TXD;
141	int cur, hdr_len;
142
143	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
144	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
145		      E1000_TXD_DTYP_D |	/* Data descr type */
146		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
147
148	/* IP and/or TCP header checksum calculation and insertion. */
149	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
150
151	cur = pi->ipi_pidx;
152	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
153
154	/*
155	 * Start offset for header checksum calculation.
156	 * End offset for header checksum calculation.
157	 * Offset of place put the checksum.
158	 */
159	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
160	TXD->lower_setup.ip_fields.ipcse =
161	    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
162	TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
163
164	/*
165	 * Start offset for payload checksum calculation.
166	 * End offset for payload checksum calculation.
167	 * Offset of place to put the checksum.
168	 */
169	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
170	TXD->upper_setup.tcp_fields.tucse = 0;
171	TXD->upper_setup.tcp_fields.tucso =
172	    pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum);
173
174	/*
175	 * Payload size per packet w/o any headers.
176	 * Length of all headers up to payload.
177	 */
178	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
179	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
180
181	TXD->cmd_and_length = htole32(sc->txd_cmd |
182				E1000_TXD_CMD_DEXT |	/* Extended descr */
183				E1000_TXD_CMD_TSE |	/* TSE context */
184				E1000_TXD_CMD_IP |	/* Do IP csum */
185				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
186				      (pi->ipi_len - hdr_len)); /* Total len */
187	txr->tx_tso = true;
188
189	if (++cur == scctx->isc_ntxd[0]) {
190		cur = 0;
191	}
192	DPRINTF(iflib_get_dev(sc->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur);
193	return (cur);
194}
195
196#define TSO_WORKAROUND 4
197#define DONT_FORCE_CTX 1
198
199
200/*********************************************************************
201 *  The offload context is protocol specific (TCP/UDP) and thus
202 *  only needs to be set when the protocol changes. The occasion
203 *  of a context change can be a performance detriment, and
204 *  might be better just disabled. The reason arises in the way
205 *  in which the controller supports pipelined requests from the
206 *  Tx data DMA. Up to four requests can be pipelined, and they may
207 *  belong to the same packet or to multiple packets. However all
208 *  requests for one packet are issued before a request is issued
209 *  for a subsequent packet and if a request for the next packet
210 *  requires a context change, that request will be stalled
211 *  until the previous request completes. This means setting up
212 *  a new context effectively disables pipelined Tx data DMA which
213 *  in turn greatly slow down performance to send small sized
214 *  frames.
215 **********************************************************************/
216
217static int
218em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower)
219{
220	 struct e1000_context_desc *TXD = NULL;
221	if_softc_ctx_t scctx = sc->shared;
222	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
223	struct tx_ring *txr = &que->txr;
224	int csum_flags = pi->ipi_csum_flags;
225	int cur, hdr_len;
226	u32 cmd;
227
228	cur = pi->ipi_pidx;
229	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
230	cmd = sc->txd_cmd;
231
232	/*
233	 * The 82574L can only remember the *last* context used
234	 * regardless of queue that it was use for.  We cannot reuse
235	 * contexts on this hardware platform and must generate a new
236	 * context every time.  82574L hardware spec, section 7.2.6,
237	 * second note.
238	 */
239	if (DONT_FORCE_CTX &&
240	    sc->tx_num_queues == 1 &&
241	    txr->csum_lhlen == pi->ipi_ehdrlen &&
242	    txr->csum_iphlen == pi->ipi_ip_hlen &&
243	    txr->csum_flags == csum_flags) {
244		/*
245		 * Same csum offload context as the previous packets;
246		 * just return.
247		 */
248		*txd_upper = txr->csum_txd_upper;
249		*txd_lower = txr->csum_txd_lower;
250		return (cur);
251	}
252
253	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
254	if (csum_flags & CSUM_IP) {
255		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
256		/*
257		 * Start offset for header checksum calculation.
258		 * End offset for header checksum calculation.
259		 * Offset of place to put the checksum.
260		 */
261		TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
262		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
263		TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
264		cmd |= E1000_TXD_CMD_IP;
265	}
266
267	if (csum_flags & (CSUM_TCP|CSUM_UDP)) {
268		uint8_t tucso;
269
270		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
271		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
272
273		if (csum_flags & CSUM_TCP) {
274			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
275			cmd |= E1000_TXD_CMD_TCP;
276		} else
277			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
278		TXD->upper_setup.tcp_fields.tucss = hdr_len;
279		TXD->upper_setup.tcp_fields.tucse = htole16(0);
280		TXD->upper_setup.tcp_fields.tucso = tucso;
281	}
282
283	txr->csum_lhlen = pi->ipi_ehdrlen;
284	txr->csum_iphlen = pi->ipi_ip_hlen;
285	txr->csum_flags = csum_flags;
286	txr->csum_txd_upper = *txd_upper;
287	txr->csum_txd_lower = *txd_lower;
288
289	TXD->tcp_seg_setup.data = htole32(0);
290	TXD->cmd_and_length =
291		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
292
293	if (++cur == scctx->isc_ntxd[0]) {
294		cur = 0;
295	}
296	DPRINTF(iflib_get_dev(sc->ctx), "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n",
297		      csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
298	return (cur);
299}
300
301static int
302em_isc_txd_encap(void *arg, if_pkt_info_t pi)
303{
304	struct e1000_softc *sc = arg;
305	if_softc_ctx_t scctx = sc->shared;
306	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
307	struct tx_ring *txr = &que->txr;
308	bus_dma_segment_t *segs = pi->ipi_segs;
309	int nsegs = pi->ipi_nsegs;
310	int csum_flags = pi->ipi_csum_flags;
311	int i, j, first, pidx_last;
312	u32 txd_flags, txd_upper = 0, txd_lower = 0;
313
314	struct e1000_tx_desc *ctxd = NULL;
315	bool do_tso, tso_desc;
316	qidx_t ntxd;
317
318	txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0;
319	i = first = pi->ipi_pidx;
320	do_tso = (csum_flags & CSUM_TSO);
321	tso_desc = false;
322	ntxd = scctx->isc_ntxd[0];
323	/*
324	 * TSO Hardware workaround, if this packet is not
325	 * TSO, and is only a single descriptor long, and
326	 * it follows a TSO burst, then we need to add a
327	 * sentinel descriptor to prevent premature writeback.
328	 */
329	if ((!do_tso) && (txr->tx_tso == true)) {
330		if (nsegs == 1)
331			tso_desc = true;
332		txr->tx_tso = false;
333	}
334
335	/* Do hardware assists */
336	if (do_tso) {
337		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
338		tso_desc = true;
339	} else if (csum_flags & EM_CSUM_OFFLOAD) {
340		i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower);
341	}
342
343	if (pi->ipi_mflags & M_VLANTAG) {
344		/* Set the vlan id. */
345		txd_upper |= htole16(pi->ipi_vtag) << 16;
346		/* Tell hardware to add tag */
347		txd_lower |= htole32(E1000_TXD_CMD_VLE);
348	}
349
350	DPRINTF(iflib_get_dev(sc->ctx), "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
351	/* XXX sc->pcix_82544 -- lem_fill_descriptors */
352
353	/* Set up our transmit descriptors */
354	for (j = 0; j < nsegs; j++) {
355		bus_size_t seg_len;
356		bus_addr_t seg_addr;
357		uint32_t cmd;
358
359		ctxd = &txr->tx_base[i];
360		seg_addr = segs[j].ds_addr;
361		seg_len = segs[j].ds_len;
362		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
363
364		/*
365		 * TSO Workaround:
366		 * If this is the last descriptor, we want to
367		 * split it so we have a small final sentinel
368		 */
369		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
370			seg_len -= TSO_WORKAROUND;
371			ctxd->buffer_addr = htole64(seg_addr);
372			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
373			ctxd->upper.data = htole32(txd_upper);
374
375			if (++i == scctx->isc_ntxd[0])
376				i = 0;
377
378			/* Now make the sentinel */
379			ctxd = &txr->tx_base[i];
380			ctxd->buffer_addr = htole64(seg_addr + seg_len);
381			ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND);
382			ctxd->upper.data = htole32(txd_upper);
383			pidx_last = i;
384			if (++i == scctx->isc_ntxd[0])
385				i = 0;
386			DPRINTF(iflib_get_dev(sc->ctx), "TSO path pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]);
387		} else {
388			ctxd->buffer_addr = htole64(seg_addr);
389			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
390			ctxd->upper.data = htole32(txd_upper);
391			pidx_last = i;
392			if (++i == scctx->isc_ntxd[0])
393				i = 0;
394			DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]);
395		}
396	}
397
398	/*
399	 * Last Descriptor of Packet
400	 * needs End Of Packet (EOP)
401	 * and Report Status (RS)
402	 */
403	if (txd_flags && nsegs) {
404		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
405		DPRINTF(iflib_get_dev(sc->ctx), "setting to RS on %d rs_pidx %d first: %d\n", pidx_last, txr->tx_rs_pidx, first);
406		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
407		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
408	}
409	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags);
410	DPRINTF(iflib_get_dev(sc->ctx), "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
411	pi->ipi_new_pidx = i;
412
413	return (0);
414}
415
416static void
417em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
418{
419	struct e1000_softc *sc = arg;
420	struct em_tx_queue *que = &sc->tx_queues[txqid];
421	struct tx_ring *txr = &que->txr;
422
423	E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx);
424}
425
426static int
427em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
428{
429	struct e1000_softc *sc = arg;
430	if_softc_ctx_t scctx = sc->shared;
431	struct em_tx_queue *que = &sc->tx_queues[txqid];
432	struct tx_ring *txr = &que->txr;
433
434	qidx_t processed = 0;
435	int updated;
436	qidx_t cur, prev, ntxd, rs_cidx;
437	int32_t delta;
438	uint8_t status;
439
440	rs_cidx = txr->tx_rs_cidx;
441	if (rs_cidx == txr->tx_rs_pidx)
442		return (0);
443	cur = txr->tx_rsq[rs_cidx];
444	MPASS(cur != QIDX_INVALID);
445	status = txr->tx_base[cur].upper.fields.status;
446	updated = !!(status & E1000_TXD_STAT_DD);
447
448	if (!updated)
449		return (0);
450
451	/* If clear is false just let caller know that there
452	 * are descriptors to reclaim */
453	if (!clear)
454		return (1);
455
456	prev = txr->tx_cidx_processed;
457	ntxd = scctx->isc_ntxd[0];
458	do {
459		MPASS(prev != cur);
460		delta = (int32_t)cur - (int32_t)prev;
461		if (delta < 0)
462			delta += ntxd;
463		MPASS(delta > 0);
464		DPRINTF(iflib_get_dev(sc->ctx),
465			      "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
466			      __FUNCTION__, prev, cur, clear, delta);
467
468		processed += delta;
469		prev  = cur;
470		rs_cidx = (rs_cidx + 1) & (ntxd-1);
471		if (rs_cidx  == txr->tx_rs_pidx)
472			break;
473		cur = txr->tx_rsq[rs_cidx];
474		MPASS(cur != QIDX_INVALID);
475		status = txr->tx_base[cur].upper.fields.status;
476	} while ((status & E1000_TXD_STAT_DD));
477
478	txr->tx_rs_cidx = rs_cidx;
479	txr->tx_cidx_processed = prev;
480	return(processed);
481}
482
483static void
484lem_isc_rxd_refill(void *arg, if_rxd_update_t iru)
485{
486	struct e1000_softc *sc = arg;
487	if_softc_ctx_t scctx = sc->shared;
488	struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx];
489	struct rx_ring *rxr = &que->rxr;
490	struct e1000_rx_desc *rxd;
491	uint64_t *paddrs;
492	uint32_t next_pidx, pidx;
493	uint16_t count;
494	int i;
495
496	paddrs = iru->iru_paddrs;
497	pidx = iru->iru_pidx;
498	count = iru->iru_count;
499
500	for (i = 0, next_pidx = pidx; i < count; i++) {
501		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
502		rxd->buffer_addr = htole64(paddrs[i]);
503		/* status bits must be cleared */
504		rxd->status = 0;
505
506		if (++next_pidx == scctx->isc_nrxd[0])
507			next_pidx = 0;
508	}
509}
510
511static void
512em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
513{
514	struct e1000_softc *sc = arg;
515	if_softc_ctx_t scctx = sc->shared;
516	uint16_t rxqid = iru->iru_qsidx;
517	struct em_rx_queue *que = &sc->rx_queues[rxqid];
518	struct rx_ring *rxr = &que->rxr;
519	union e1000_rx_desc_extended *rxd;
520	uint64_t *paddrs;
521	uint32_t next_pidx, pidx;
522	uint16_t count;
523	int i;
524
525	paddrs = iru->iru_paddrs;
526	pidx = iru->iru_pidx;
527	count = iru->iru_count;
528
529	for (i = 0, next_pidx = pidx; i < count; i++) {
530		rxd = &rxr->rx_base[next_pidx];
531		rxd->read.buffer_addr = htole64(paddrs[i]);
532		/* DD bits must be cleared */
533		rxd->wb.upper.status_error = 0;
534
535		if (++next_pidx == scctx->isc_nrxd[0])
536			next_pidx = 0;
537	}
538}
539
540static void
541em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx)
542{
543	struct e1000_softc *sc = arg;
544	struct em_rx_queue *que = &sc->rx_queues[rxqid];
545	struct rx_ring *rxr = &que->rxr;
546
547	E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
548}
549
550static int
551lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
552{
553	struct e1000_softc *sc = arg;
554	if_softc_ctx_t scctx = sc->shared;
555	struct em_rx_queue *que = &sc->rx_queues[rxqid];
556	struct rx_ring *rxr = &que->rxr;
557	struct e1000_rx_desc *rxd;
558	u32 staterr = 0;
559	int cnt, i;
560
561	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
562		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
563		staterr = rxd->status;
564
565		if ((staterr & E1000_RXD_STAT_DD) == 0)
566			break;
567		if (++i == scctx->isc_nrxd[0])
568			i = 0;
569		if (staterr & E1000_RXD_STAT_EOP)
570			cnt++;
571	}
572	return (cnt);
573}
574
575static int
576em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
577{
578	struct e1000_softc *sc = arg;
579	if_softc_ctx_t scctx = sc->shared;
580	struct em_rx_queue *que = &sc->rx_queues[rxqid];
581	struct rx_ring *rxr = &que->rxr;
582	union e1000_rx_desc_extended *rxd;
583	u32 staterr = 0;
584	int cnt, i;
585
586	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
587		rxd = &rxr->rx_base[i];
588		staterr = le32toh(rxd->wb.upper.status_error);
589
590		if ((staterr & E1000_RXD_STAT_DD) == 0)
591			break;
592		if (++i == scctx->isc_nrxd[0])
593			i = 0;
594		if (staterr & E1000_RXD_STAT_EOP)
595			cnt++;
596	}
597	return (cnt);
598}
599
600static int
601lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
602{
603	struct e1000_softc *sc = arg;
604	if_softc_ctx_t scctx = sc->shared;
605	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
606	struct rx_ring *rxr = &que->rxr;
607	struct e1000_rx_desc *rxd;
608	u16 len;
609	u32 status, errors;
610	bool eop;
611	int i, cidx;
612
613	status = errors = i = 0;
614	cidx = ri->iri_cidx;
615
616	do {
617		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
618		status = rxd->status;
619		errors = rxd->errors;
620
621		/* Error Checking then decrement count */
622		MPASS ((status & E1000_RXD_STAT_DD) != 0);
623
624		len = le16toh(rxd->length);
625		ri->iri_len += len;
626
627		eop = (status & E1000_RXD_STAT_EOP) != 0;
628
629		/* Make sure bad packets are discarded */
630		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
631			sc->dropped_pkts++;
632			/* XXX fixup if common */
633			return (EBADMSG);
634		}
635
636		ri->iri_frags[i].irf_flid = 0;
637		ri->iri_frags[i].irf_idx = cidx;
638		ri->iri_frags[i].irf_len = len;
639		/* Zero out the receive descriptors status. */
640		rxd->status = 0;
641
642		if (++cidx == scctx->isc_nrxd[0])
643			cidx = 0;
644		i++;
645	} while (!eop);
646
647	/* XXX add a faster way to look this up */
648	if (sc->hw.mac.type >= e1000_82543)
649		em_receive_checksum(status, errors, ri);
650
651	if (status & E1000_RXD_STAT_VP) {
652		ri->iri_vtag = le16toh(rxd->special);
653		ri->iri_flags |= M_VLANTAG;
654	}
655
656	ri->iri_nfrags = i;
657
658	return (0);
659}
660
661static int
662em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
663{
664	struct e1000_softc *sc = arg;
665	if_softc_ctx_t scctx = sc->shared;
666	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
667	struct rx_ring *rxr = &que->rxr;
668	union e1000_rx_desc_extended *rxd;
669
670	u16 len;
671	u32 pkt_info;
672	u32 staterr = 0;
673	bool eop;
674	int i, cidx;
675
676	i = 0;
677	cidx = ri->iri_cidx;
678
679	do {
680		rxd = &rxr->rx_base[cidx];
681		staterr = le32toh(rxd->wb.upper.status_error);
682		pkt_info = le32toh(rxd->wb.lower.mrq);
683
684		/* Error Checking then decrement count */
685		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
686
687		len = le16toh(rxd->wb.upper.length);
688		ri->iri_len += len;
689
690		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
691
692		/* Make sure bad packets are discarded */
693		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
694			sc->dropped_pkts++;
695			return EBADMSG;
696		}
697
698		ri->iri_frags[i].irf_flid = 0;
699		ri->iri_frags[i].irf_idx = cidx;
700		ri->iri_frags[i].irf_len = len;
701		/* Zero out the receive descriptors status. */
702		rxd->wb.upper.status_error &= htole32(~0xFF);
703
704		if (++cidx == scctx->isc_nrxd[0])
705			cidx = 0;
706		i++;
707	} while (!eop);
708
709	if (scctx->isc_capenable & IFCAP_RXCSUM)
710		em_receive_checksum(staterr, staterr >> 24, ri);
711
712	if (staterr & E1000_RXD_STAT_VP) {
713		ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
714		ri->iri_flags |= M_VLANTAG;
715	}
716
717	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
718	ri->iri_rsstype = em_determine_rsstype(pkt_info);
719
720	ri->iri_nfrags = i;
721	return (0);
722}
723
724/*********************************************************************
725 *
726 *  Verify that the hardware indicated that the checksum is valid.
727 *  Inform the stack about the status of checksum so that stack
728 *  doesn't spend time verifying the checksum.
729 *
730 *********************************************************************/
731static void
732em_receive_checksum(uint16_t status, uint8_t errors, if_rxd_info_t ri)
733{
734	if (__predict_false(status & E1000_RXD_STAT_IXSM))
735		return;
736
737	/* If there is a layer 3 or 4 error we are done */
738	if (__predict_false(errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE)))
739		return;
740
741	/* IP Checksum Good */
742	if (status & E1000_RXD_STAT_IPCS)
743		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
744
745	/* Valid L4E checksum */
746	if (__predict_true(status &
747	    (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) {
748		ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
749		ri->iri_csum_data = htons(0xffff);
750	}
751}
752
753/********************************************************************
754 *
755 *  Parse the packet type to determine the appropriate hash
756 *
757 ******************************************************************/
758static int
759em_determine_rsstype(u32 pkt_info)
760{
761	switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
762	case E1000_RXDADV_RSSTYPE_IPV4_TCP:
763		return M_HASHTYPE_RSS_TCP_IPV4;
764	case E1000_RXDADV_RSSTYPE_IPV4:
765		return M_HASHTYPE_RSS_IPV4;
766	case E1000_RXDADV_RSSTYPE_IPV6_TCP:
767		return M_HASHTYPE_RSS_TCP_IPV6;
768	case E1000_RXDADV_RSSTYPE_IPV6_EX:
769		return M_HASHTYPE_RSS_IPV6_EX;
770	case E1000_RXDADV_RSSTYPE_IPV6:
771		return M_HASHTYPE_RSS_IPV6;
772	case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
773		return M_HASHTYPE_RSS_TCP_IPV6_EX;
774	default:
775		return M_HASHTYPE_OPAQUE;
776	}
777}
778