ethernet-tx.c revision 213156
1/*************************************************************************
2Copyright (c) 2003-2007  Cavium Networks (support@cavium.com). All rights
3reserved.
4
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are
8met:
9
10    * Redistributions of source code must retain the above copyright
11      notice, this list of conditions and the following disclaimer.
12
13    * Redistributions in binary form must reproduce the above
14      copyright notice, this list of conditions and the following
15      disclaimer in the documentation and/or other materials provided
16      with the distribution.
17
18    * Neither the name of Cavium Networks nor the names of
19      its contributors may be used to endorse or promote products
20      derived from this software without specific prior written
21      permission.
22
23This Software, including technical data, may be subject to U.S. export  control laws, including the U.S. Export Administration Act and its  associated regulations, and may be subject to export or import  regulations in other countries.
24
25TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
26AND WITH ALL FAULTS AND CAVIUM  NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
27
28*************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/mips/cavium/octe/ethernet-tx.c 213156 2010-09-25 04:39:12Z jmallett $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/bus.h>
36#include <sys/endian.h>
37#include <sys/kernel.h>
38#include <sys/mbuf.h>
39#include <sys/socket.h>
40
41#include <net/bpf.h>
42#include <net/ethernet.h>
43#include <net/if.h>
44
45#include "wrapper-cvmx-includes.h"
46#include "ethernet-headers.h"
47
48/* You can define GET_MBUF_QOS() to override how the mbuf output function
49   determines which output queue is used. The default implementation
50   always uses the base queue for the port. If, for example, you wanted
51   to use the m->priority fieid, define GET_MBUF_QOS as:
52   #define GET_MBUF_QOS(m) ((m)->priority) */
53#ifndef GET_MBUF_QOS
54    #define GET_MBUF_QOS(m) 0
55#endif
56
57extern int pow_send_group;
58
59
60/**
61 * Packet transmit
62 *
63 * @param m    Packet to send
64 * @param dev    Device info structure
65 * @return Always returns zero
66 */
67int cvm_oct_xmit(struct mbuf *m, struct ifnet *ifp)
68{
69	cvmx_pko_command_word0_t    pko_command;
70	cvmx_buf_ptr_t              hw_buffer;
71	uint64_t                    old_scratch;
72	uint64_t                    old_scratch2;
73	int                         dropped;
74	int                         qos;
75	cvm_oct_private_t          *priv = (cvm_oct_private_t *)ifp->if_softc;
76	int32_t in_use;
77	int32_t buffers_to_free;
78	cvmx_wqe_t *work;
79
80	/* Prefetch the private data structure.
81	   It is larger that one cache line */
82	CVMX_PREFETCH(priv, 0);
83
84	/* Start off assuming no drop */
85	dropped = 0;
86
87	/* The check on CVMX_PKO_QUEUES_PER_PORT_* is designed to completely
88	   remove "qos" in the event neither interface supports multiple queues
89	   per port */
90	if ((CVMX_PKO_QUEUES_PER_PORT_INTERFACE0 > 1) ||
91	    (CVMX_PKO_QUEUES_PER_PORT_INTERFACE1 > 1)) {
92		qos = GET_MBUF_QOS(m);
93		if (qos <= 0)
94			qos = 0;
95		else if (qos >= cvmx_pko_get_num_queues(priv->port))
96			qos = 0;
97	} else
98		qos = 0;
99
100	if (USE_ASYNC_IOBDMA) {
101		/* Save scratch in case userspace is using it */
102		CVMX_SYNCIOBDMA;
103		old_scratch = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
104		old_scratch2 = cvmx_scratch_read64(CVMX_SCR_SCRATCH+8);
105
106		/* Assume we're going to be able t osend this packet. Fetch and increment
107		   the number of pending packets for output */
108		cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH+8, FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
109		cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH, priv->fau+qos*4, 1);
110	}
111
112	/* The CN3XXX series of parts has an errata (GMX-401) which causes the
113	   GMX block to hang if a collision occurs towards the end of a
114	   <68 byte packet. As a workaround for this, we pad packets to be
115	   68 bytes whenever we are in half duplex mode. We don't handle
116	   the case of having a small packet but no room to add the padding.
117	   The kernel should always give us at least a cache line */
118	if (__predict_false(m->m_pkthdr.len < 64) && OCTEON_IS_MODEL(OCTEON_CN3XXX)) {
119		cvmx_gmxx_prtx_cfg_t gmx_prt_cfg;
120		int interface = INTERFACE(priv->port);
121		int index = INDEX(priv->port);
122
123		if (interface < 2) {
124			/* We only need to pad packet in half duplex mode */
125			gmx_prt_cfg.u64 = cvmx_read_csr(CVMX_GMXX_PRTX_CFG(index, interface));
126			if (gmx_prt_cfg.s.duplex == 0) {
127				static uint8_t pad[64];
128
129				if (!m_append(m, sizeof pad - m->m_pkthdr.len, pad))
130					printf("%s: unable to padd small packet.", __func__);
131			}
132		}
133	}
134
135	/*
136	 * If the packet is not fragmented.
137	 */
138	if (m->m_pkthdr.len == m->m_len) {
139		/* Build the PKO buffer pointer */
140		hw_buffer.u64 = 0;
141		hw_buffer.s.addr = cvmx_ptr_to_phys(m->m_data);
142		hw_buffer.s.pool = 0;
143		hw_buffer.s.size = m->m_len;
144
145		/* Build the PKO command */
146		pko_command.u64 = 0;
147		pko_command.s.segs = 1;
148
149		work = NULL;
150	} else {
151		struct mbuf *n;
152		unsigned segs;
153		uint64_t *gp;
154
155		/*
156		 * The packet is fragmented, we need to send a list of segments
157		 * in memory we borrow from the WQE pool.
158		 */
159		work = cvmx_fpa_alloc(CVMX_FPA_WQE_POOL);
160		gp = (uint64_t *)work;
161
162		segs = 0;
163		for (n = m; n != NULL; n = n->m_next) {
164			if (segs == CVMX_FPA_WQE_POOL_SIZE / sizeof (uint64_t))
165				panic("%s: too many segments in packet; call m_collapse().", __func__);
166
167			/* Build the PKO buffer pointer */
168			hw_buffer.u64 = 0;
169			hw_buffer.s.addr = cvmx_ptr_to_phys(n->m_data);
170			hw_buffer.s.pool = 0;
171			hw_buffer.s.size = n->m_len;
172
173			*gp++ = hw_buffer.u64;
174			segs++;
175		}
176
177		/* Build the PKO buffer gather list pointer */
178		hw_buffer.u64 = 0;
179		hw_buffer.s.addr = cvmx_ptr_to_phys(work);
180		hw_buffer.s.pool = CVMX_FPA_WQE_POOL;
181		hw_buffer.s.size = segs;
182
183		/* Build the PKO command */
184		pko_command.u64 = 0;
185		pko_command.s.segs = segs;
186		pko_command.s.gather = 1;
187	}
188
189	/* Finish building the PKO command */
190	pko_command.s.n2 = 1; /* Don't pollute L2 with the outgoing packet */
191	pko_command.s.dontfree = 1;
192	pko_command.s.reg0 = priv->fau+qos*4;
193	pko_command.s.reg0 = priv->fau+qos*4;
194	pko_command.s.total_bytes = m->m_pkthdr.len;
195	pko_command.s.size0 = CVMX_FAU_OP_SIZE_32;
196	pko_command.s.subone0 = 1;
197
198	/* Check if we can use the hardware checksumming */
199	if (USE_HW_TCPUDP_CHECKSUM &&
200	    (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)) != 0) {
201		/* Use hardware checksum calc */
202		pko_command.s.ipoffp1 = ETHER_HDR_LEN + 1;
203	}
204
205	IF_LOCK(&priv->tx_free_queue[qos]);
206	if (USE_ASYNC_IOBDMA) {
207		/* Get the number of mbufs in use by the hardware */
208		CVMX_SYNCIOBDMA;
209		in_use = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
210		buffers_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH+8);
211	} else {
212		/* Get the number of mbufs in use by the hardware */
213		in_use = cvmx_fau_fetch_and_add32(priv->fau+qos*4, 1);
214		buffers_to_free = cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
215	}
216
217	cvmx_pko_send_packet_prepare(priv->port, priv->queue + qos, CVMX_PKO_LOCK_CMD_QUEUE);
218
219	/* Drop this packet if we have too many already queued to the HW */
220	if (_IF_QFULL(&priv->tx_free_queue[qos])) {
221		dropped = 1;
222	}
223	/* Send the packet to the output queue */
224	else
225	if (__predict_false(cvmx_pko_send_packet_finish(priv->port, priv->queue + qos, pko_command, hw_buffer, CVMX_PKO_LOCK_CMD_QUEUE))) {
226		DEBUGPRINT("%s: Failed to send the packet\n", if_name(ifp));
227		dropped = 1;
228	}
229
230	if (USE_ASYNC_IOBDMA) {
231		/* Restore the scratch area */
232		cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch);
233		cvmx_scratch_write64(CVMX_SCR_SCRATCH+8, old_scratch2);
234	}
235
236	if (__predict_false(dropped)) {
237		m_freem(m);
238		cvmx_fau_atomic_add32(priv->fau+qos*4, -1);
239		ifp->if_oerrors++;
240	} else {
241		/* Put this packet on the queue to be freed later */
242		_IF_ENQUEUE(&priv->tx_free_queue[qos], m);
243
244		/* Pass it to any BPF listeners.  */
245		ETHER_BPF_MTAP(ifp, m);
246	}
247	if (work != NULL)
248		cvmx_fpa_free(work, CVMX_FPA_WQE_POOL, DONT_WRITEBACK(1));
249
250	/* Free mbufs not in use by the hardware */
251	if (_IF_QLEN(&priv->tx_free_queue[qos]) > in_use) {
252		while (_IF_QLEN(&priv->tx_free_queue[qos]) > in_use) {
253			_IF_DEQUEUE(&priv->tx_free_queue[qos], m);
254			m_freem(m);
255		}
256	}
257	IF_UNLOCK(&priv->tx_free_queue[qos]);
258
259	return dropped;
260}
261
262
263/**
264 * Packet transmit to the POW
265 *
266 * @param m    Packet to send
267 * @param dev    Device info structure
268 * @return Always returns zero
269 */
270int cvm_oct_xmit_pow(struct mbuf *m, struct ifnet *ifp)
271{
272	cvm_oct_private_t  *priv = (cvm_oct_private_t *)ifp->if_softc;
273	char               *packet_buffer;
274	char               *copy_location;
275
276	/* Get a work queue entry */
277	cvmx_wqe_t *work = cvmx_fpa_alloc(CVMX_FPA_WQE_POOL);
278	if (__predict_false(work == NULL)) {
279		DEBUGPRINT("%s: Failed to allocate a work queue entry\n", if_name(ifp));
280		ifp->if_oerrors++;
281		m_freem(m);
282		return 0;
283	}
284
285	/* Get a packet buffer */
286	packet_buffer = cvmx_fpa_alloc(CVMX_FPA_PACKET_POOL);
287	if (__predict_false(packet_buffer == NULL)) {
288		DEBUGPRINT("%s: Failed to allocate a packet buffer\n",
289			   if_name(ifp));
290		cvmx_fpa_free(work, CVMX_FPA_WQE_POOL, DONT_WRITEBACK(1));
291		ifp->if_oerrors++;
292		m_freem(m);
293		return 0;
294	}
295
296	/* Calculate where we need to copy the data to. We need to leave 8 bytes
297	   for a next pointer (unused). We also need to include any configure
298	   skip. Then we need to align the IP packet src and dest into the same
299	   64bit word. The below calculation may add a little extra, but that
300	   doesn't hurt */
301	copy_location = packet_buffer + sizeof(uint64_t);
302	copy_location += ((CVMX_HELPER_FIRST_MBUFF_SKIP+7)&0xfff8) + 6;
303
304	/* We have to copy the packet since whoever processes this packet
305	   will free it to a hardware pool. We can't use the trick of
306	   counting outstanding packets like in cvm_oct_xmit */
307	m_copydata(m, 0, m->m_pkthdr.len, copy_location);
308
309	/* Fill in some of the work queue fields. We may need to add more
310	   if the software at the other end needs them */
311#if 0
312	work->hw_chksum     = m->csum;
313#endif
314	work->len           = m->m_pkthdr.len;
315	work->ipprt         = priv->port;
316	work->qos           = priv->port & 0x7;
317	work->grp           = pow_send_group;
318	work->tag_type      = CVMX_HELPER_INPUT_TAG_TYPE;
319	work->tag           = pow_send_group; /* FIXME */
320	work->word2.u64     = 0;    /* Default to zero. Sets of zero later are commented out */
321	work->word2.s.bufs  = 1;
322	work->packet_ptr.u64 = 0;
323	work->packet_ptr.s.addr = cvmx_ptr_to_phys(copy_location);
324	work->packet_ptr.s.pool = CVMX_FPA_PACKET_POOL;
325	work->packet_ptr.s.size = CVMX_FPA_PACKET_POOL_SIZE;
326	work->packet_ptr.s.back = (copy_location - packet_buffer)>>7;
327
328	panic("%s: POW transmit not quite implemented yet.", __func__);
329#if 0
330	if (m->protocol == htons(ETH_P_IP)) {
331		work->word2.s.ip_offset     = 14;
332		#if 0
333		work->word2.s.vlan_valid  = 0; /* FIXME */
334		work->word2.s.vlan_cfi    = 0; /* FIXME */
335		work->word2.s.vlan_id     = 0; /* FIXME */
336		work->word2.s.dec_ipcomp  = 0; /* FIXME */
337		#endif
338		work->word2.s.tcp_or_udp    = (ip_hdr(m)->protocol == IP_PROTOCOL_TCP) || (ip_hdr(m)->protocol == IP_PROTOCOL_UDP);
339		#if 0
340		work->word2.s.dec_ipsec   = 0; /* FIXME */
341		work->word2.s.is_v6       = 0; /* We only support IPv4 right now */
342		work->word2.s.software    = 0; /* Hardware would set to zero */
343		work->word2.s.L4_error    = 0; /* No error, packet is internal */
344		#endif
345		work->word2.s.is_frag       = !((ip_hdr(m)->frag_off == 0) || (ip_hdr(m)->frag_off == 1<<14));
346		#if 0
347		work->word2.s.IP_exc      = 0;  /* Assume Linux is sending a good packet */
348		#endif
349		work->word2.s.is_bcast      = (m->pkt_type == PACKET_BROADCAST);
350		work->word2.s.is_mcast      = (m->pkt_type == PACKET_MULTICAST);
351		#if 0
352		work->word2.s.not_IP      = 0; /* This is an IP packet */
353		work->word2.s.rcv_error   = 0; /* No error, packet is internal */
354		work->word2.s.err_code    = 0; /* No error, packet is internal */
355		#endif
356
357		/* When copying the data, include 4 bytes of the ethernet header to
358		   align the same way hardware does */
359		memcpy(work->packet_data, m->data + 10, sizeof(work->packet_data));
360	} else {
361		#if 0
362		work->word2.snoip.vlan_valid  = 0; /* FIXME */
363		work->word2.snoip.vlan_cfi    = 0; /* FIXME */
364		work->word2.snoip.vlan_id     = 0; /* FIXME */
365		work->word2.snoip.software    = 0; /* Hardware would set to zero */
366		#endif
367		work->word2.snoip.is_rarp       = m->protocol == htons(ETH_P_RARP);
368		work->word2.snoip.is_arp        = m->protocol == htons(ETH_P_ARP);
369		work->word2.snoip.is_bcast      = (m->pkt_type == PACKET_BROADCAST);
370		work->word2.snoip.is_mcast      = (m->pkt_type == PACKET_MULTICAST);
371		work->word2.snoip.not_IP        = 1; /* IP was done up above */
372		#if 0
373		work->word2.snoip.rcv_error   = 0; /* No error, packet is internal */
374		work->word2.snoip.err_code    = 0; /* No error, packet is internal */
375		#endif
376		memcpy(work->packet_data, m->data, sizeof(work->packet_data));
377	}
378#endif
379
380	/* Submit the packet to the POW */
381	cvmx_pow_work_submit(work, work->tag, work->tag_type, work->qos, work->grp);
382	ifp->if_opackets++;
383	ifp->if_obytes += m->m_pkthdr.len;
384	m_freem(m);
385	return 0;
386}
387
388
389/**
390 * This function frees all mbufs that are currenty queued for TX.
391 *
392 * @param dev    Device being shutdown
393 */
394void cvm_oct_tx_shutdown(struct ifnet *ifp)
395{
396	cvm_oct_private_t *priv = (cvm_oct_private_t *)ifp->if_softc;
397	int qos;
398
399	for (qos = 0; qos < 16; qos++) {
400		IF_DRAIN(&priv->tx_free_queue[qos]);
401	}
402}
403