ethernet-rx.c revision 232812
1/*************************************************************************
2Copyright (c) 2003-2007  Cavium Networks (support@cavium.com). All rights
3reserved.
4
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are
8met:
9
10    * Redistributions of source code must retain the above copyright
11      notice, this list of conditions and the following disclaimer.
12
13    * Redistributions in binary form must reproduce the above
14      copyright notice, this list of conditions and the following
15      disclaimer in the documentation and/or other materials provided
16      with the distribution.
17
18    * Neither the name of Cavium Networks nor the names of
19      its contributors may be used to endorse or promote products
20      derived from this software without specific prior written
21      permission.
22
23This Software, including technical data, may be subject to U.S. export  control laws, including the U.S. Export Administration Act and its  associated regulations, and may be subject to export or import  regulations in other countries.
24
25TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
26AND WITH ALL FAULTS AND CAVIUM  NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
27
28*************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/mips/cavium/octe/ethernet-rx.c 232812 2012-03-11 06:17:49Z jmallett $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/bus.h>
36#include <sys/endian.h>
37#include <sys/kernel.h>
38#include <sys/mbuf.h>
39#include <sys/socket.h>
40#include <sys/proc.h>
41#include <sys/sched.h>
42#include <sys/smp.h>
43#include <sys/taskqueue.h>
44
45#include <net/ethernet.h>
46#include <net/if.h>
47
48#include "wrapper-cvmx-includes.h"
49#include "ethernet-headers.h"
50
51extern int pow_receive_group;
52extern struct ifnet *cvm_oct_device[];
53
54static struct task cvm_oct_task;
55static struct taskqueue *cvm_oct_taskq;
56
57static int cvm_oct_rx_active;
58
59/**
60 * Interrupt handler. The interrupt occurs whenever the POW
61 * transitions from 0->1 packets in our group.
62 *
63 * @param cpl
64 * @param dev_id
65 * @param regs
66 * @return
67 */
68int cvm_oct_do_interrupt(void *dev_id)
69{
70	/* Acknowledge the interrupt */
71	if (INTERRUPT_LIMIT)
72		cvmx_write_csr(CVMX_POW_WQ_INT, 1<<pow_receive_group);
73	else
74		cvmx_write_csr(CVMX_POW_WQ_INT, 0x10001<<pow_receive_group);
75
76	/*
77	 * Schedule task if there isn't one running.
78	 */
79	if (atomic_cmpset_int(&cvm_oct_rx_active, 0, 1))
80		taskqueue_enqueue(cvm_oct_taskq, &cvm_oct_task);
81
82	return FILTER_HANDLED;
83}
84
85
86/**
87 * This is called on receive errors, and determines if the packet
88 * can be dropped early-on in cvm_oct_tasklet_rx().
89 *
90 * @param work Work queue entry pointing to the packet.
91 * @return Non-zero if the packet can be dropped, zero otherwise.
92 */
93static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work)
94{
95	if ((work->word2.snoip.err_code == 10) && (work->word1.s.len <= 64)) {
96		/* Ignore length errors on min size packets. Some equipment
97		   incorrectly pads packets to 64+4FCS instead of 60+4FCS.
98		   Note these packets still get counted as frame errors. */
99	} else
100	if (USE_10MBPS_PREAMBLE_WORKAROUND && ((work->word2.snoip.err_code == 5) || (work->word2.snoip.err_code == 7))) {
101
102		/* We received a packet with either an alignment error or a
103		   FCS error. This may be signalling that we are running
104		   10Mbps with GMXX_RXX_FRM_CTL[PRE_CHK} off. If this is the
105		   case we need to parse the packet to determine if we can
106		   remove a non spec preamble and generate a correct packet */
107		int interface = cvmx_helper_get_interface_num(work->word1.cn38xx.ipprt);
108		int index = cvmx_helper_get_interface_index_num(work->word1.cn38xx.ipprt);
109		cvmx_gmxx_rxx_frm_ctl_t gmxx_rxx_frm_ctl;
110		gmxx_rxx_frm_ctl.u64 = cvmx_read_csr(CVMX_GMXX_RXX_FRM_CTL(index, interface));
111		if (gmxx_rxx_frm_ctl.s.pre_chk == 0) {
112
113			uint8_t *ptr = cvmx_phys_to_ptr(work->packet_ptr.s.addr);
114			int i = 0;
115
116			while (i < work->word1.s.len-1) {
117				if (*ptr != 0x55)
118					break;
119				ptr++;
120				i++;
121			}
122
123			if (*ptr == 0xd5) {
124				/*
125				DEBUGPRINT("Port %d received 0xd5 preamble\n", work->word1.cn38xx.ipprt);
126				*/
127				work->packet_ptr.s.addr += i+1;
128				work->word1.s.len -= i+5;
129			} else
130			if ((*ptr & 0xf) == 0xd) {
131				/*
132				DEBUGPRINT("Port %d received 0x?d preamble\n", work->word1.cn38xx.ipprt);
133				*/
134				work->packet_ptr.s.addr += i;
135				work->word1.s.len -= i+4;
136				for (i = 0; i < work->word1.s.len; i++) {
137					*ptr = ((*ptr&0xf0)>>4) | ((*(ptr+1)&0xf)<<4);
138					ptr++;
139				}
140			} else {
141				DEBUGPRINT("Port %d unknown preamble, packet dropped\n", work->word1.cn38xx.ipprt);
142				/*
143				cvmx_helper_dump_packet(work);
144				*/
145				cvm_oct_free_work(work);
146				return 1;
147			}
148		}
149	} else {
150		DEBUGPRINT("Port %d receive error code %d, packet dropped\n", work->word1.cn38xx.ipprt, work->word2.snoip.err_code);
151		cvm_oct_free_work(work);
152		return 1;
153	}
154
155	return 0;
156}
157
158/**
159 * Tasklet function that is scheduled on a core when an interrupt occurs.
160 *
161 * @param unused
162 */
163void cvm_oct_tasklet_rx(void *context, int pending)
164{
165	int                 coreid;
166	uint64_t            old_group_mask;
167	int                 rx_count = 0;
168	int                 number_to_free;
169	int                 num_freed;
170	int                 packet_not_copied;
171
172	sched_pin();
173	coreid = cvmx_get_core_num();
174
175	/* Prefetch cvm_oct_device since we know we need it soon */
176	CVMX_PREFETCH(cvm_oct_device, 0);
177
178	/* Only allow work for our group (and preserve priorities) */
179	old_group_mask = cvmx_read_csr(CVMX_POW_PP_GRP_MSKX(coreid));
180	cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid),
181		       (old_group_mask & ~0xFFFFull) | 1<<pow_receive_group);
182
183	while (1) {
184		struct mbuf *m = NULL;
185		int mbuf_in_hw;
186		cvmx_wqe_t *work;
187
188		if ((INTERRUPT_LIMIT == 0) || (rx_count < MAX_RX_PACKETS))
189			work = cvmx_pow_work_request_sync(CVMX_POW_NO_WAIT);
190		else
191			work = NULL;
192		CVMX_PREFETCH(work, 0);
193		if (work == NULL)
194			break;
195
196		mbuf_in_hw = work->word2.s.bufs == 1;
197		if ((mbuf_in_hw)) {
198			m = *(struct mbuf **)(cvm_oct_get_buffer_ptr(work->packet_ptr) - sizeof(void *));
199			CVMX_PREFETCH(m, offsetof(struct mbuf, m_data));
200			CVMX_PREFETCH(m, offsetof(struct mbuf, m_pkthdr));
201		}
202		CVMX_PREFETCH(cvm_oct_device[work->word1.cn38xx.ipprt], 0);
203		//CVMX_PREFETCH(m, 0);
204
205
206		rx_count++;
207		/* Immediately throw away all packets with receive errors */
208		if ((work->word2.snoip.rcv_error)) {
209			if (cvm_oct_check_rcv_error(work))
210				continue;
211		}
212
213		/* We can only use the zero copy path if mbufs are in the FPA pool
214		   and the packet fits in a single buffer */
215		if ((mbuf_in_hw)) {
216			CVMX_PREFETCH(m->m_data, 0);
217
218			m->m_pkthdr.len = m->m_len = work->word1.s.len;
219
220			packet_not_copied = 1;
221
222			/*
223			 * Adjust the data pointer based on the offset
224			 * of the packet within the buffer.
225			 */
226			m->m_data += (work->packet_ptr.s.back << 7) + (work->packet_ptr.s.addr & 0x7f);
227		} else {
228
229			/* We have to copy the packet. First allocate an
230			   mbuf for it */
231			MGETHDR(m, M_DONTWAIT, MT_DATA);
232			if (m == NULL) {
233				DEBUGPRINT("Port %d failed to allocate mbuf, packet dropped\n", work->word1.cn38xx.ipprt);
234				cvm_oct_free_work(work);
235				continue;
236			}
237
238			/* Check if we've received a packet that was entirely
239			   stored in the work entry. This is untested */
240			if ((work->word2.s.bufs == 0)) {
241				uint8_t *ptr = work->packet_data;
242
243				if (cvmx_likely(!work->word2.s.not_IP)) {
244					/* The beginning of the packet moves
245					   for IP packets */
246					if (work->word2.s.is_v6)
247						ptr += 2;
248					else
249						ptr += 6;
250				}
251				panic("%s: not yet implemented; copy in small packet.", __func__);
252				/* No packet buffers to free */
253			} else {
254				int segments = work->word2.s.bufs;
255				cvmx_buf_ptr_t segment_ptr = work->packet_ptr;
256				int len = work->word1.s.len;
257
258				while (segments--) {
259					cvmx_buf_ptr_t next_ptr = *(cvmx_buf_ptr_t *)cvmx_phys_to_ptr(segment_ptr.s.addr-8);
260					/* Octeon Errata PKI-100: The segment
261					   size is wrong. Until it is fixed,
262					   calculate the segment size based on
263					   the packet pool buffer size. When
264					   it is fixed, the following line
265					   should be replaced with this one:
266					int segment_size = segment_ptr.s.size; */
267					int segment_size = CVMX_FPA_PACKET_POOL_SIZE - (segment_ptr.s.addr - (((segment_ptr.s.addr >> 7) - segment_ptr.s.back) << 7));
268					/* Don't copy more than what is left
269					   in the packet */
270					if (segment_size > len)
271						segment_size = len;
272					/* Copy the data into the packet */
273					panic("%s: not yet implemented; copy in packet segments.", __func__);
274#if 0
275					memcpy(m_put(m, segment_size), cvmx_phys_to_ptr(segment_ptr.s.addr), segment_size);
276#endif
277					/* Reduce the amount of bytes left
278					   to copy */
279					len -= segment_size;
280					segment_ptr = next_ptr;
281				}
282			}
283			packet_not_copied = 0;
284		}
285
286		if (((work->word1.cn38xx.ipprt < TOTAL_NUMBER_OF_PORTS) &&
287		    cvm_oct_device[work->word1.cn38xx.ipprt])) {
288			struct ifnet *ifp = cvm_oct_device[work->word1.cn38xx.ipprt];
289
290			/* Only accept packets for devices
291			   that are currently up */
292			if ((ifp->if_flags & IFF_UP)) {
293				m->m_pkthdr.rcvif = ifp;
294
295				if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
296					if ((work->word2.s.not_IP || work->word2.s.IP_exc || work->word2.s.L4_error))
297						m->m_pkthdr.csum_flags = 0; /* XXX */
298					else {
299						m->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
300						m->m_pkthdr.csum_data = 0xffff;
301					}
302				} else {
303					m->m_pkthdr.csum_flags = 0; /* XXX */
304				}
305
306				ifp->if_ipackets++;
307
308				(*ifp->if_input)(ifp, m);
309			} else {
310				/* Drop any packet received for a device that isn't up */
311				/*
312				DEBUGPRINT("%s: Device not up, packet dropped\n",
313					   if_name(ifp));
314				*/
315				m_freem(m);
316			}
317		} else {
318			/* Drop any packet received for a device that
319			   doesn't exist */
320			DEBUGPRINT("Port %d not controlled by Linux, packet dropped\n", work->word1.cn38xx.ipprt);
321			m_freem(m);
322		}
323
324		/* Check to see if the mbuf and work share
325		   the same packet buffer */
326		if ((packet_not_copied)) {
327			/* This buffer needs to be replaced, increment
328			the number of buffers we need to free by one */
329			cvmx_fau_atomic_add32(
330				FAU_NUM_PACKET_BUFFERS_TO_FREE, 1);
331
332			cvmx_fpa_free(work, CVMX_FPA_WQE_POOL,
333				      DONT_WRITEBACK(1));
334		} else
335			cvm_oct_free_work(work);
336	}
337
338	/*
339	 * If we hit our limit, schedule another task while we clean up.
340	 */
341	if (INTERRUPT_LIMIT != 0 && rx_count == MAX_RX_PACKETS) {
342		taskqueue_enqueue(cvm_oct_taskq, &cvm_oct_task);
343	} else {
344		/*
345		 * No more packets, all done.
346		 */
347		if (!atomic_cmpset_int(&cvm_oct_rx_active, 1, 0))
348			panic("%s: inconsistent rx active state.", __func__);
349	}
350
351	/* Restore the original POW group mask */
352	cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid), old_group_mask);
353
354	/* Refill the packet buffer pool */
355	number_to_free =
356	  cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
357
358	if (number_to_free > 0) {
359		cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE,
360				      -number_to_free);
361		num_freed =
362			cvm_oct_mem_fill_fpa(CVMX_FPA_PACKET_POOL,
363					     CVMX_FPA_PACKET_POOL_SIZE,
364					     number_to_free);
365		if (num_freed != number_to_free) {
366			cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE,
367					      number_to_free - num_freed);
368		}
369	}
370	sched_unpin();
371}
372
373
374
375void cvm_oct_rx_initialize(void)
376{
377	TASK_INIT(&cvm_oct_task, 0, cvm_oct_tasklet_rx, NULL);
378
379	cvm_oct_taskq = taskqueue_create_fast("oct_rx", M_NOWAIT,
380					      taskqueue_thread_enqueue,
381					      &cvm_oct_taskq);
382	taskqueue_start_threads(&cvm_oct_taskq, min(mp_ncpus, MAXCPU),
383				PI_NET, "octe taskq");
384}
385
386void cvm_oct_rx_shutdown(void)
387{
388	panic("%s: not yet implemented.", __func__);
389}
390
391