bge_recv2.c revision 8275:7c223a798022
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include "bge_impl.h"
28
29#define	U32TOPTR(x)	((void *)(uintptr_t)(uint32_t)(x))
30#define	PTRTOU32(x)	((uint32_t)(uintptr_t)(void *)(x))
31
32/*
33 * ========== RX side routines ==========
34 */
35
36#define	BGE_DBG		BGE_DBG_RECV	/* debug flag for this code	*/
37
38static void bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp);
39#pragma	inline(bge_refill)
40
41/*
42 * Return the specified buffer (srbdp) to the ring it came from (brp).
43 *
44 * Note:
45 *	If the driver is compiled with only one buffer ring *and* one
46 *	return ring, then the buffers must be returned in sequence.
47 *	In this case, we don't have to consider anything about the
48 *	buffer at all; we can simply advance the cyclic counter.  And
49 *	we don't even need the refill mutex <rf_lock>, as the caller
50 *	will already be holding the (one-and-only) <rx_lock>.
51 *
52 *	If the driver supports multiple buffer rings, but only one
53 *	return ring, the same still applies (to each buffer ring
54 *	separately).
55 */
56static void
57bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp)
58{
59	uint64_t slot;
60
61	_NOTE(ARGUNUSED(srbdp))
62
63	slot = brp->rf_next;
64	brp->rf_next = NEXT(slot, brp->desc.nslots);
65	bge_mbx_put(bgep, brp->chip_mbx_reg, slot);
66}
67
68static mblk_t *bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p);
69#pragma	inline(bge_receive_packet)
70
71static mblk_t *
72bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p)
73{
74	bge_rbd_t hw_rbd;
75	buff_ring_t *brp;
76	sw_rbd_t *srbdp;
77	uchar_t *dp;
78	mblk_t *mp;
79	uint_t len;
80	uint_t minsize;
81	uint_t maxsize;
82	uint32_t pflags;
83
84	mp = NULL;
85	hw_rbd = *hw_rbd_p;
86
87	switch (hw_rbd.flags & (RBD_FLAG_MINI_RING|RBD_FLAG_JUMBO_RING)) {
88	case RBD_FLAG_MINI_RING|RBD_FLAG_JUMBO_RING:
89	default:
90		/* error, this shouldn't happen */
91		BGE_PKTDUMP((bgep, &hw_rbd, NULL, "bad ring flags!"));
92		goto error;
93
94	case RBD_FLAG_JUMBO_RING:
95		brp = &bgep->buff[BGE_JUMBO_BUFF_RING];
96		break;
97
98#if	(BGE_BUFF_RINGS_USED > 2)
99	case RBD_FLAG_MINI_RING:
100		brp = &bgep->buff[BGE_MINI_BUFF_RING];
101		break;
102#endif	/* BGE_BUFF_RINGS_USED > 2 */
103
104	case 0:
105		brp = &bgep->buff[BGE_STD_BUFF_RING];
106		break;
107	}
108
109	if (hw_rbd.index >= brp->desc.nslots) {
110		/* error, this shouldn't happen */
111		BGE_PKTDUMP((bgep, &hw_rbd, NULL, "bad ring index!"));
112		goto error;
113	}
114
115	srbdp = &brp->sw_rbds[hw_rbd.index];
116	if (hw_rbd.opaque != srbdp->pbuf.token) {
117		/* bogus, drop the packet */
118		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "bad ring token"));
119		goto refill;
120	}
121
122	if ((hw_rbd.flags & RBD_FLAG_PACKET_END) == 0) {
123		/* bogus, drop the packet */
124		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "unterminated packet"));
125		goto refill;
126	}
127
128	if (hw_rbd.flags & RBD_FLAG_FRAME_HAS_ERROR) {
129		/* bogus, drop the packet */
130		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "errored packet"));
131		goto refill;
132	}
133
134	len = hw_rbd.len;
135
136#ifdef BGE_IPMI_ASF
137	/*
138	 * When IPMI/ASF is enabled, VLAN tag must be stripped.
139	 */
140	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG))
141		maxsize = bgep->chipid.ethmax_size + ETHERFCSL;
142	else
143#endif
144		/*
145		 * H/W will not strip the VLAN tag from incoming packet
146		 * now, as RECEIVE_MODE_KEEP_VLAN_TAG bit is set in
147		 * RECEIVE_MAC_MODE_REG register.
148		 */
149		maxsize = bgep->chipid.ethmax_size + VLAN_TAGSZ + ETHERFCSL;
150	if (len > maxsize) {
151		/* bogus, drop the packet */
152		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "oversize packet"));
153		goto refill;
154	}
155
156#ifdef BGE_IPMI_ASF
157	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG))
158		minsize = ETHERMIN + ETHERFCSL - VLAN_TAGSZ;
159	else
160#endif
161		minsize = ETHERMIN + ETHERFCSL;
162	if (len < minsize) {
163		/* bogus, drop the packet */
164		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "undersize packet"));
165		goto refill;
166	}
167
168	/*
169	 * Packet looks good; get a buffer to copy it into.
170	 * We want to leave some space at the front of the allocated
171	 * buffer in case any upstream modules want to prepend some
172	 * sort of header.  This also has the side-effect of making
173	 * the packet *contents* 4-byte aligned, as required by NCA!
174	 */
175#ifdef BGE_IPMI_ASF
176	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
177		mp = allocb(BGE_HEADROOM + len + VLAN_TAGSZ, 0);
178	} else {
179#endif
180
181		mp = allocb(BGE_HEADROOM + len, 0);
182#ifdef BGE_IPMI_ASF
183	}
184#endif
185	if (mp == NULL) {
186		/* Nothing to do but drop the packet */
187		goto refill;
188	}
189
190	/*
191	 * Sync the data and copy it to the STREAMS buffer.
192	 */
193	DMA_SYNC(srbdp->pbuf, DDI_DMA_SYNC_FORKERNEL);
194	if (bge_check_dma_handle(bgep, srbdp->pbuf.dma_hdl) != DDI_FM_OK) {
195		bgep->bge_dma_error = B_TRUE;
196		bgep->bge_chip_state = BGE_CHIP_ERROR;
197		return (NULL);
198	}
199#ifdef BGE_IPMI_ASF
200	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
201		/*
202		 * As VLAN tag has been stripped from incoming packet in ASF
203		 * scenario, we insert it into this packet again.
204		 */
205		struct ether_vlan_header *ehp;
206		mp->b_rptr = dp = mp->b_rptr + BGE_HEADROOM - VLAN_TAGSZ;
207		bcopy(DMA_VPTR(srbdp->pbuf), dp, 2 * ETHERADDRL);
208		ehp = (void *)dp;
209		ehp->ether_tpid = ntohs(ETHERTYPE_VLAN);
210		ehp->ether_tci = ntohs(hw_rbd.vlan_tci);
211		bcopy(((uchar_t *)(DMA_VPTR(srbdp->pbuf))) + 2 * ETHERADDRL,
212		    dp + 2 * ETHERADDRL + VLAN_TAGSZ,
213		    len - 2 * ETHERADDRL);
214	} else {
215#endif
216		mp->b_rptr = dp = mp->b_rptr + BGE_HEADROOM;
217		bcopy(DMA_VPTR(srbdp->pbuf), dp, len);
218#ifdef BGE_IPMI_ASF
219	}
220
221	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
222		mp->b_wptr = dp + len + VLAN_TAGSZ - ETHERFCSL;
223	} else
224#endif
225		mp->b_wptr = dp + len - ETHERFCSL;
226
227	/*
228	 * Special check for one specific type of data corruption;
229	 * in a good packet, the first 8 bytes are *very* unlikely
230	 * to be the same as the second 8 bytes ... but we let the
231	 * packet through just in case.
232	 */
233	if (bcmp(dp, dp+8, 8) == 0)
234		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "stuttered packet?"));
235
236	pflags = 0;
237	if (hw_rbd.flags & RBD_FLAG_TCP_UDP_CHECKSUM)
238		pflags |= HCK_FULLCKSUM;
239	if (hw_rbd.flags & RBD_FLAG_IP_CHECKSUM)
240		pflags |= HCK_IPV4_HDRCKSUM;
241	if (pflags != 0)
242		(void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0,
243		    hw_rbd.tcp_udp_cksum, pflags, 0);
244
245refill:
246	/*
247	 * Replace the buffer in the ring it came from ...
248	 */
249	bge_refill(bgep, brp, srbdp);
250	return (mp);
251
252error:
253	/*
254	 * We come here if the integrity of the ring descriptors
255	 * (rather than merely packet data) appears corrupted.
256	 * The factotum will attempt to reset-and-recover.
257	 */
258	bgep->bge_chip_state = BGE_CHIP_ERROR;
259	bge_fm_ereport(bgep, DDI_FM_DEVICE_INVAL_STATE);
260	return (NULL);
261}
262
263/*
264 * Accept the packets received in the specified ring up to
265 * (but not including) the producer index in the status block.
266 *
267 * Returns a chain of mblks containing the received data, to be
268 * passed up to gld_recv() (we can't call gld_recv() from here,
269 * 'cos we're holding the per-ring receive lock at this point).
270 *
271 * This function must advance (rrp->rx_next) and write it back to
272 * the chip to indicate the packets it has accepted from the ring.
273 */
274static mblk_t *bge_receive_ring(bge_t *bgep, recv_ring_t *rrp);
275#ifndef	DEBUG
276#pragma	inline(bge_receive_ring)
277#endif
278
279static mblk_t *
280bge_receive_ring(bge_t *bgep, recv_ring_t *rrp)
281{
282	bge_rbd_t *hw_rbd_p;
283	uint64_t slot;
284	mblk_t *head;
285	mblk_t **tail;
286	mblk_t *mp;
287	int recv_cnt = 0;
288
289	ASSERT(mutex_owned(rrp->rx_lock));
290
291	/*
292	 * Sync (all) the receive ring descriptors
293	 * before accepting the packets they describe
294	 */
295	DMA_SYNC(rrp->desc, DDI_DMA_SYNC_FORKERNEL);
296	if (*rrp->prod_index_p >= rrp->desc.nslots) {
297		bgep->bge_chip_state = BGE_CHIP_ERROR;
298		bge_fm_ereport(bgep, DDI_FM_DEVICE_INVAL_STATE);
299		return (NULL);
300	}
301	if (bge_check_dma_handle(bgep, rrp->desc.dma_hdl) != DDI_FM_OK) {
302		rrp->rx_next = *rrp->prod_index_p;
303		bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
304		bgep->bge_dma_error = B_TRUE;
305		bgep->bge_chip_state = BGE_CHIP_ERROR;
306		return (NULL);
307	}
308
309	hw_rbd_p = DMA_VPTR(rrp->desc);
310	head = NULL;
311	tail = &head;
312	slot = rrp->rx_next;
313
314	while ((slot != *rrp->prod_index_p) && /* Note: volatile	*/
315	    (recv_cnt < BGE_MAXPKT_RCVED)) {
316		if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot])) != NULL) {
317			*tail = mp;
318			tail = &mp->b_next;
319			recv_cnt++;
320		}
321		rrp->rx_next = slot = NEXT(slot, rrp->desc.nslots);
322	}
323
324	bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
325	if (bge_check_acc_handle(bgep, bgep->io_handle) != DDI_FM_OK)
326		bgep->bge_chip_state = BGE_CHIP_ERROR;
327	return (head);
328}
329
330/*
331 * XXX: Poll a particular ring. The implementation is incomplete.
332 * Once the ring interrupts are disabled, we need to do bge_recyle()
333 * for the ring as well and re enable the ring interrupt automatically
334 * if the poll doesn't find any packets in the ring. We need to
335 * have MSI-X interrupts support for this.
336 *
337 * The basic poll policy is that rings that are dealing with explicit
338 * flows (like TCP or some service) and are marked as such should
339 * have their own MSI-X interrupt per ring. bge_intr() should leave
340 * that interrupt disabled after an upcall. The ring is in poll mode.
341 * When a poll thread comes down and finds nothing, the MSI-X interrupt
342 * is automatically enabled. Squeue needs to deal with the race of
343 * a new interrupt firing and reaching before poll thread returns.
344 */
345mblk_t *
346bge_poll_ring(void *arg, int bytes_to_pickup)
347{
348	recv_ring_t *rrp = arg;
349	bge_t *bgep = rrp->bgep;
350	bge_rbd_t *hw_rbd_p;
351	uint64_t slot;
352	mblk_t *head;
353	mblk_t **tail;
354	mblk_t *mp;
355	size_t sz = 0;
356
357	mutex_enter(rrp->rx_lock);
358
359	/*
360	 * Sync (all) the receive ring descriptors
361	 * before accepting the packets they describe
362	 */
363	DMA_SYNC(rrp->desc, DDI_DMA_SYNC_FORKERNEL);
364	hw_rbd_p = DMA_VPTR(rrp->desc);
365	head = NULL;
366	tail = &head;
367	slot = rrp->rx_next;
368
369	/* Note: volatile */
370	while ((slot != *rrp->prod_index_p) && (sz <= bytes_to_pickup)) {
371		if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot])) != NULL) {
372			*tail = mp;
373			sz += msgdsize(mp);
374			tail = &mp->b_next;
375		}
376		rrp->rx_next = slot = NEXT(slot, rrp->desc.nslots);
377	}
378
379	bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
380	mutex_exit(rrp->rx_lock);
381	return (head);
382}
383
384/*
385 * Receive all packets in all rings.
386 */
387void bge_receive(bge_t *bgep, bge_status_t *bsp);
388#pragma	no_inline(bge_receive)
389
390void
391bge_receive(bge_t *bgep, bge_status_t *bsp)
392{
393	recv_ring_t *rrp;
394	uint64_t index;
395	mblk_t *mp;
396
397	for (index = 0; index < bgep->chipid.rx_rings; index++) {
398		/*
399		 * Start from the first ring.
400		 */
401		rrp = &bgep->recv[index];
402
403		/*
404		 * For each ring, (rrp->prod_index_p) points to the
405		 * proper index within the status block (which has
406		 * already been sync'd by the caller)
407		 */
408		ASSERT(rrp->prod_index_p == RECV_INDEX_P(bsp, index));
409
410		if (*rrp->prod_index_p == rrp->rx_next || rrp->poll_flag)
411			continue;		/* no packets		*/
412		if (mutex_tryenter(rrp->rx_lock) == 0)
413			continue;		/* already in process	*/
414		mp = bge_receive_ring(bgep, rrp);
415		mutex_exit(rrp->rx_lock);
416
417		if (mp != NULL)
418			mac_rx_ring(bgep->mh, rrp->ring_handle, mp,
419			    rrp->ring_gen_num);
420	}
421}
422