ethernet-rx.c revision 215974
1210311Sjmallett/*************************************************************************
2210311SjmallettCopyright (c) 2003-2007  Cavium Networks (support@cavium.com). All rights
3210311Sjmallettreserved.
4210311Sjmallett
5210311Sjmallett
6210311SjmallettRedistribution and use in source and binary forms, with or without
7210311Sjmallettmodification, are permitted provided that the following conditions are
8210311Sjmallettmet:
9210311Sjmallett
10210311Sjmallett    * Redistributions of source code must retain the above copyright
11210311Sjmallett      notice, this list of conditions and the following disclaimer.
12210311Sjmallett
13210311Sjmallett    * Redistributions in binary form must reproduce the above
14210311Sjmallett      copyright notice, this list of conditions and the following
15210311Sjmallett      disclaimer in the documentation and/or other materials provided
16210311Sjmallett      with the distribution.
17210311Sjmallett
18210311Sjmallett    * Neither the name of Cavium Networks nor the names of
19210311Sjmallett      its contributors may be used to endorse or promote products
20210311Sjmallett      derived from this software without specific prior written
21210311Sjmallett      permission.
22210311Sjmallett
23210311SjmallettThis Software, including technical data, may be subject to U.S. export  control laws, including the U.S. Export Administration Act and its  associated regulations, and may be subject to export or import  regulations in other countries.
24210311Sjmallett
25210311SjmallettTO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
26210311SjmallettAND WITH ALL FAULTS AND CAVIUM  NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
27210311Sjmallett
28210311Sjmallett*************************************************************************/
29210311Sjmallett
30210311Sjmallett#include <sys/cdefs.h>
31210311Sjmallett__FBSDID("$FreeBSD: head/sys/mips/cavium/octe/ethernet-rx.c 215974 2010-11-28 05:57:24Z jmallett $");
32210311Sjmallett
33210311Sjmallett#include <sys/param.h>
34210311Sjmallett#include <sys/systm.h>
35210311Sjmallett#include <sys/bus.h>
36210311Sjmallett#include <sys/endian.h>
37210311Sjmallett#include <sys/kernel.h>
38210311Sjmallett#include <sys/mbuf.h>
39210311Sjmallett#include <sys/socket.h>
40213156Sjmallett#include <sys/proc.h>
41213156Sjmallett#include <sys/sched.h>
42210311Sjmallett#include <sys/smp.h>
43210311Sjmallett#include <sys/taskqueue.h>
44210311Sjmallett
45210311Sjmallett#include <net/ethernet.h>
46210311Sjmallett#include <net/if.h>
47210311Sjmallett
48210311Sjmallett#include "wrapper-cvmx-includes.h"
49210311Sjmallett#include "ethernet-headers.h"
50210311Sjmallett
51210311Sjmallettextern int pow_receive_group;
52210311Sjmallettextern struct ifnet *cvm_oct_device[];
53210311Sjmallett
54210311Sjmallettstatic struct task cvm_oct_task;
55210311Sjmallettstatic struct taskqueue *cvm_oct_taskq;
56210311Sjmallett
57210311Sjmallett/**
58210311Sjmallett * Interrupt handler. The interrupt occurs whenever the POW
59210311Sjmallett * transitions from 0->1 packets in our group.
60210311Sjmallett *
61210311Sjmallett * @param cpl
62210311Sjmallett * @param dev_id
63210311Sjmallett * @param regs
64210311Sjmallett * @return
65210311Sjmallett */
66210311Sjmallettint cvm_oct_do_interrupt(void *dev_id)
67210311Sjmallett{
68210311Sjmallett	/* Acknowledge the interrupt */
69210311Sjmallett	if (INTERRUPT_LIMIT)
70210311Sjmallett		cvmx_write_csr(CVMX_POW_WQ_INT, 1<<pow_receive_group);
71210311Sjmallett	else
72210311Sjmallett		cvmx_write_csr(CVMX_POW_WQ_INT, 0x10001<<pow_receive_group);
73210311Sjmallett	taskqueue_enqueue(cvm_oct_taskq, &cvm_oct_task);
74210311Sjmallett	return FILTER_HANDLED;
75210311Sjmallett}
76210311Sjmallett
77210311Sjmallett
78210311Sjmallett/**
79210311Sjmallett * This is called on receive errors, and determines if the packet
80210311Sjmallett * can be dropped early-on in cvm_oct_tasklet_rx().
81210311Sjmallett *
82210311Sjmallett * @param work Work queue entry pointing to the packet.
83210311Sjmallett * @return Non-zero if the packet can be dropped, zero otherwise.
84210311Sjmallett */
85210311Sjmallettstatic inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work)
86210311Sjmallett{
87210311Sjmallett	if ((work->word2.snoip.err_code == 10) && (work->len <= 64)) {
88210311Sjmallett		/* Ignore length errors on min size packets. Some equipment
89210311Sjmallett		   incorrectly pads packets to 64+4FCS instead of 60+4FCS.
90210311Sjmallett		   Note these packets still get counted as frame errors. */
91210311Sjmallett	} else
92210311Sjmallett	if (USE_10MBPS_PREAMBLE_WORKAROUND && ((work->word2.snoip.err_code == 5) || (work->word2.snoip.err_code == 7))) {
93210311Sjmallett
94210311Sjmallett		/* We received a packet with either an alignment error or a
95210311Sjmallett		   FCS error. This may be signalling that we are running
96210311Sjmallett		   10Mbps with GMXX_RXX_FRM_CTL[PRE_CHK} off. If this is the
97210311Sjmallett		   case we need to parse the packet to determine if we can
98210311Sjmallett		   remove a non spec preamble and generate a correct packet */
99210311Sjmallett		int interface = cvmx_helper_get_interface_num(work->ipprt);
100210311Sjmallett		int index = cvmx_helper_get_interface_index_num(work->ipprt);
101210311Sjmallett		cvmx_gmxx_rxx_frm_ctl_t gmxx_rxx_frm_ctl;
102210311Sjmallett		gmxx_rxx_frm_ctl.u64 = cvmx_read_csr(CVMX_GMXX_RXX_FRM_CTL(index, interface));
103210311Sjmallett		if (gmxx_rxx_frm_ctl.s.pre_chk == 0) {
104210311Sjmallett
105210311Sjmallett			uint8_t *ptr = cvmx_phys_to_ptr(work->packet_ptr.s.addr);
106210311Sjmallett			int i = 0;
107210311Sjmallett
108210311Sjmallett			while (i < work->len-1) {
109210311Sjmallett				if (*ptr != 0x55)
110210311Sjmallett					break;
111210311Sjmallett				ptr++;
112210311Sjmallett				i++;
113210311Sjmallett			}
114210311Sjmallett
115210311Sjmallett			if (*ptr == 0xd5) {
116210311Sjmallett				/*
117210311Sjmallett				DEBUGPRINT("Port %d received 0xd5 preamble\n", work->ipprt);
118210311Sjmallett				*/
119210311Sjmallett				work->packet_ptr.s.addr += i+1;
120210311Sjmallett				work->len -= i+5;
121210311Sjmallett			} else
122210311Sjmallett			if ((*ptr & 0xf) == 0xd) {
123210311Sjmallett				/*
124210311Sjmallett				DEBUGPRINT("Port %d received 0x?d preamble\n", work->ipprt);
125210311Sjmallett				*/
126210311Sjmallett				work->packet_ptr.s.addr += i;
127210311Sjmallett				work->len -= i+4;
128210311Sjmallett				for (i = 0; i < work->len; i++) {
129210311Sjmallett					*ptr = ((*ptr&0xf0)>>4) | ((*(ptr+1)&0xf)<<4);
130210311Sjmallett					ptr++;
131210311Sjmallett				}
132210311Sjmallett			} else {
133210311Sjmallett				DEBUGPRINT("Port %d unknown preamble, packet dropped\n", work->ipprt);
134210311Sjmallett				/*
135210311Sjmallett				cvmx_helper_dump_packet(work);
136210311Sjmallett				*/
137210311Sjmallett				cvm_oct_free_work(work);
138210311Sjmallett				return 1;
139210311Sjmallett			}
140210311Sjmallett		}
141210311Sjmallett	} else {
142210311Sjmallett		DEBUGPRINT("Port %d receive error code %d, packet dropped\n", work->ipprt, work->word2.snoip.err_code);
143210311Sjmallett		cvm_oct_free_work(work);
144210311Sjmallett		return 1;
145210311Sjmallett	}
146210311Sjmallett
147210311Sjmallett	return 0;
148210311Sjmallett}
149210311Sjmallett
150210311Sjmallett/**
151210311Sjmallett * Tasklet function that is scheduled on a core when an interrupt occurs.
152210311Sjmallett *
153210311Sjmallett * @param unused
154210311Sjmallett */
155210311Sjmallettvoid cvm_oct_tasklet_rx(void *context, int pending)
156210311Sjmallett{
157213156Sjmallett	int                 coreid;
158210311Sjmallett	uint64_t            old_group_mask;
159210311Sjmallett	uint64_t            old_scratch;
160210311Sjmallett	int                 rx_count = 0;
161210311Sjmallett	int                 number_to_free;
162210311Sjmallett	int                 num_freed;
163210311Sjmallett	int                 packet_not_copied;
164210311Sjmallett
165213156Sjmallett	sched_pin();
166213156Sjmallett	coreid = cvmx_get_core_num();
167213156Sjmallett
168210311Sjmallett	/* Prefetch cvm_oct_device since we know we need it soon */
169210311Sjmallett	CVMX_PREFETCH(cvm_oct_device, 0);
170210311Sjmallett
171210311Sjmallett	if (USE_ASYNC_IOBDMA) {
172210311Sjmallett		/* Save scratch in case userspace is using it */
173210311Sjmallett		CVMX_SYNCIOBDMA;
174210311Sjmallett		old_scratch = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
175210311Sjmallett	}
176210311Sjmallett
177210311Sjmallett	/* Only allow work for our group (and preserve priorities) */
178210311Sjmallett	old_group_mask = cvmx_read_csr(CVMX_POW_PP_GRP_MSKX(coreid));
179210311Sjmallett	cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid),
180210311Sjmallett		       (old_group_mask & ~0xFFFFull) | 1<<pow_receive_group);
181210311Sjmallett
182210311Sjmallett	if (USE_ASYNC_IOBDMA)
183210311Sjmallett		cvmx_pow_work_request_async(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT);
184210311Sjmallett
185210311Sjmallett	while (1) {
186210311Sjmallett		struct mbuf *m = NULL;
187210311Sjmallett		int mbuf_in_hw;
188210311Sjmallett		cvmx_wqe_t *work;
189210311Sjmallett
190210311Sjmallett		if (USE_ASYNC_IOBDMA) {
191210311Sjmallett			work = cvmx_pow_work_response_async(CVMX_SCR_SCRATCH);
192210311Sjmallett		} else {
193210311Sjmallett			if ((INTERRUPT_LIMIT == 0) || (rx_count < MAX_RX_PACKETS))
194210311Sjmallett				work = cvmx_pow_work_request_sync(CVMX_POW_NO_WAIT);
195210311Sjmallett			else
196210311Sjmallett				work = NULL;
197210311Sjmallett		}
198210311Sjmallett		CVMX_PREFETCH(work, 0);
199210311Sjmallett		if (work == NULL)
200210311Sjmallett			break;
201210311Sjmallett
202210311Sjmallett		/* Limit each core to processing MAX_RX_PACKETS packets without a break.
203210311Sjmallett		   This way the RX can't starve the TX task. */
204210311Sjmallett		if (USE_ASYNC_IOBDMA) {
205210311Sjmallett
206210311Sjmallett			if ((INTERRUPT_LIMIT == 0) || (rx_count < MAX_RX_PACKETS))
207210311Sjmallett				cvmx_pow_work_request_async_nocheck(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT);
208210311Sjmallett			else {
209210311Sjmallett				cvmx_scratch_write64(CVMX_SCR_SCRATCH, 0x8000000000000000ull);
210210311Sjmallett				cvmx_pow_tag_sw_null_nocheck();
211210311Sjmallett			}
212210311Sjmallett		}
213210311Sjmallett
214215974Sjmallett		mbuf_in_hw = work->word2.s.bufs == 1;
215210311Sjmallett		if ((mbuf_in_hw)) {
216210311Sjmallett			m = *(struct mbuf **)(cvm_oct_get_buffer_ptr(work->packet_ptr) - sizeof(void *));
217210311Sjmallett			CVMX_PREFETCH(m, offsetof(struct mbuf, m_data));
218210311Sjmallett			CVMX_PREFETCH(m, offsetof(struct mbuf, m_pkthdr));
219210311Sjmallett		}
220210311Sjmallett		CVMX_PREFETCH(cvm_oct_device[work->ipprt], 0);
221210311Sjmallett		//CVMX_PREFETCH(m, 0);
222210311Sjmallett
223210311Sjmallett
224210311Sjmallett		rx_count++;
225210311Sjmallett		/* Immediately throw away all packets with receive errors */
226210311Sjmallett		if ((work->word2.snoip.rcv_error)) {
227210311Sjmallett			if (cvm_oct_check_rcv_error(work))
228210311Sjmallett				continue;
229210311Sjmallett		}
230210311Sjmallett
231210311Sjmallett		/* We can only use the zero copy path if mbufs are in the FPA pool
232210311Sjmallett		   and the packet fits in a single buffer */
233210311Sjmallett		if ((mbuf_in_hw)) {
234210311Sjmallett			CVMX_PREFETCH(m->m_data, 0);
235210311Sjmallett
236210311Sjmallett			m->m_pkthdr.len = m->m_len = work->len;
237210311Sjmallett
238210311Sjmallett			packet_not_copied = 1;
239210311Sjmallett
240210311Sjmallett			/*
241210311Sjmallett			 * Adjust the data pointer based on the offset
242210311Sjmallett			 * of the packet within the buffer.
243210311Sjmallett			 */
244210311Sjmallett			m->m_data += (work->packet_ptr.s.back << 7) + (work->packet_ptr.s.addr & 0x7f);
245210311Sjmallett		} else {
246210311Sjmallett
247210311Sjmallett			/* We have to copy the packet. First allocate an
248210311Sjmallett			   mbuf for it */
249210311Sjmallett			MGETHDR(m, M_DONTWAIT, MT_DATA);
250210311Sjmallett			if (m == NULL) {
251210311Sjmallett				DEBUGPRINT("Port %d failed to allocate mbuf, packet dropped\n", work->ipprt);
252210311Sjmallett				cvm_oct_free_work(work);
253210311Sjmallett				continue;
254210311Sjmallett			}
255210311Sjmallett
256210311Sjmallett			/* Check if we've received a packet that was entirely
257210311Sjmallett			   stored in the work entry. This is untested */
258210311Sjmallett			if ((work->word2.s.bufs == 0)) {
259210311Sjmallett				uint8_t *ptr = work->packet_data;
260210311Sjmallett
261210311Sjmallett				if (cvmx_likely(!work->word2.s.not_IP)) {
262210311Sjmallett					/* The beginning of the packet moves
263210311Sjmallett					   for IP packets */
264210311Sjmallett					if (work->word2.s.is_v6)
265210311Sjmallett						ptr += 2;
266210311Sjmallett					else
267210311Sjmallett						ptr += 6;
268210311Sjmallett				}
269210311Sjmallett				panic("%s: not yet implemented; copy in small packet.", __func__);
270210311Sjmallett				/* No packet buffers to free */
271210311Sjmallett			} else {
272210311Sjmallett				int segments = work->word2.s.bufs;
273210311Sjmallett				cvmx_buf_ptr_t segment_ptr = work->packet_ptr;
274210311Sjmallett				int len = work->len;
275210311Sjmallett
276210311Sjmallett				while (segments--) {
277210311Sjmallett					cvmx_buf_ptr_t next_ptr = *(cvmx_buf_ptr_t *)cvmx_phys_to_ptr(segment_ptr.s.addr-8);
278210311Sjmallett					/* Octeon Errata PKI-100: The segment
279210311Sjmallett					   size is wrong. Until it is fixed,
280210311Sjmallett					   calculate the segment size based on
281210311Sjmallett					   the packet pool buffer size. When
282210311Sjmallett					   it is fixed, the following line
283210311Sjmallett					   should be replaced with this one:
284210311Sjmallett					int segment_size = segment_ptr.s.size; */
285210311Sjmallett					int segment_size = CVMX_FPA_PACKET_POOL_SIZE - (segment_ptr.s.addr - (((segment_ptr.s.addr >> 7) - segment_ptr.s.back) << 7));
286210311Sjmallett					/* Don't copy more than what is left
287210311Sjmallett					   in the packet */
288210311Sjmallett					if (segment_size > len)
289210311Sjmallett						segment_size = len;
290210311Sjmallett					/* Copy the data into the packet */
291210311Sjmallett					panic("%s: not yet implemented; copy in packet segments.", __func__);
292210311Sjmallett#if 0
293210311Sjmallett					memcpy(m_put(m, segment_size), cvmx_phys_to_ptr(segment_ptr.s.addr), segment_size);
294210311Sjmallett#endif
295210311Sjmallett					/* Reduce the amount of bytes left
296210311Sjmallett					   to copy */
297210311Sjmallett					len -= segment_size;
298210311Sjmallett					segment_ptr = next_ptr;
299210311Sjmallett				}
300210311Sjmallett			}
301210311Sjmallett			packet_not_copied = 0;
302210311Sjmallett		}
303210311Sjmallett
304210311Sjmallett		if (((work->ipprt < TOTAL_NUMBER_OF_PORTS) &&
305210311Sjmallett		    cvm_oct_device[work->ipprt])) {
306210311Sjmallett			struct ifnet *ifp = cvm_oct_device[work->ipprt];
307210311Sjmallett
308210311Sjmallett			/* Only accept packets for devices
309210311Sjmallett			   that are currently up */
310210311Sjmallett			if ((ifp->if_flags & IFF_UP)) {
311210311Sjmallett				m->m_pkthdr.rcvif = ifp;
312210311Sjmallett
313210311Sjmallett				if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
314210311Sjmallett					if ((work->word2.s.not_IP || work->word2.s.IP_exc || work->word2.s.L4_error))
315210311Sjmallett						m->m_pkthdr.csum_flags = 0; /* XXX */
316210311Sjmallett					else {
317210311Sjmallett						m->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
318210311Sjmallett						m->m_pkthdr.csum_data = 0xffff;
319210311Sjmallett					}
320210311Sjmallett				} else {
321210311Sjmallett					m->m_pkthdr.csum_flags = 0; /* XXX */
322210311Sjmallett				}
323210311Sjmallett
324210311Sjmallett				ifp->if_ipackets++;
325210311Sjmallett
326210311Sjmallett				(*ifp->if_input)(ifp, m);
327210311Sjmallett			} else {
328210311Sjmallett				/* Drop any packet received for a device that isn't up */
329210311Sjmallett				/*
330210311Sjmallett				DEBUGPRINT("%s: Device not up, packet dropped\n",
331210311Sjmallett					   if_name(ifp));
332210311Sjmallett				*/
333210311Sjmallett				m_freem(m);
334210311Sjmallett			}
335210311Sjmallett		} else {
336210311Sjmallett			/* Drop any packet received for a device that
337210311Sjmallett			   doesn't exist */
338210311Sjmallett			DEBUGPRINT("Port %d not controlled by Linux, packet dropped\n", work->ipprt);
339210311Sjmallett			m_freem(m);
340210311Sjmallett		}
341210311Sjmallett
342210311Sjmallett		/* Check to see if the mbuf and work share
343210311Sjmallett		   the same packet buffer */
344215974Sjmallett		if ((packet_not_copied)) {
345210311Sjmallett			/* This buffer needs to be replaced, increment
346210311Sjmallett			the number of buffers we need to free by one */
347210311Sjmallett			cvmx_fau_atomic_add32(
348210311Sjmallett				FAU_NUM_PACKET_BUFFERS_TO_FREE, 1);
349210311Sjmallett
350210311Sjmallett			cvmx_fpa_free(work, CVMX_FPA_WQE_POOL,
351210311Sjmallett				      DONT_WRITEBACK(1));
352210311Sjmallett		} else
353210311Sjmallett			cvm_oct_free_work(work);
354210311Sjmallett	}
355210311Sjmallett
356210311Sjmallett	/* Restore the original POW group mask */
357210311Sjmallett	cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid), old_group_mask);
358210311Sjmallett	if (USE_ASYNC_IOBDMA) {
359210311Sjmallett		/* Restore the scratch area */
360210311Sjmallett		cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch);
361210311Sjmallett	}
362210311Sjmallett
363215974Sjmallett	/* Refill the packet buffer pool */
364215974Sjmallett	number_to_free =
365215974Sjmallett	  cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
366210311Sjmallett
367215974Sjmallett	if (number_to_free > 0) {
368215974Sjmallett		cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE,
369215974Sjmallett				      -number_to_free);
370215974Sjmallett		num_freed =
371215974Sjmallett			cvm_oct_mem_fill_fpa(CVMX_FPA_PACKET_POOL,
372215974Sjmallett					     CVMX_FPA_PACKET_POOL_SIZE,
373215974Sjmallett					     number_to_free);
374215974Sjmallett		if (num_freed != number_to_free) {
375210311Sjmallett			cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE,
376215974Sjmallett					      number_to_free - num_freed);
377210311Sjmallett		}
378210311Sjmallett	}
379213156Sjmallett	sched_unpin();
380210311Sjmallett}
381210311Sjmallett
382210311Sjmallett
383210311Sjmallett
384210311Sjmallettvoid cvm_oct_rx_initialize(void)
385210311Sjmallett{
386210311Sjmallett	TASK_INIT(&cvm_oct_task, 0, cvm_oct_tasklet_rx, NULL);
387210311Sjmallett
388210311Sjmallett	cvm_oct_taskq = taskqueue_create_fast("oct_rx", M_NOWAIT,
389210311Sjmallett					      taskqueue_thread_enqueue,
390210311Sjmallett					      &cvm_oct_taskq);
391210311Sjmallett	taskqueue_start_threads(&cvm_oct_taskq, min(mp_ncpus, MAXCPU),
392210311Sjmallett				PI_NET, "octe taskq");
393210311Sjmallett}
394210311Sjmallett
395210311Sjmallettvoid cvm_oct_rx_shutdown(void)
396210311Sjmallett{
397210311Sjmallett	panic("%s: not yet implemented.", __func__);
398210311Sjmallett}
399210311Sjmallett
400