uipc_mvec.c revision 175340
1/**************************************************************************
2 *
3 * Copyright (c) 2007, Kip Macy kmacy@freebsd.org
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice,
10 *    this list of conditions and the following disclaimer.
11 *
12 * 2. The name of Kip Macy nor the names of other
13 *    contributors may be used to endorse or promote products derived from
14 *    this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 *
28 *
29 ***************************************************************************/
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/dev/cxgb/sys/uipc_mvec.c 175340 2008-01-15 03:27:42Z kmacy $");
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/mbuf.h>
40#include <sys/ktr.h>
41#include <sys/sf_buf.h>
42
43#include <vm/vm.h>
44#include <vm/pmap.h>
45
46#include <machine/bus.h>
47
48
49
50#ifdef CONFIG_DEFINED
51#include <cxgb_include.h>
52#include <sys/mvec.h>
53#else
54#include <dev/cxgb/cxgb_include.h>
55#include <dev/cxgb/sys/mvec.h>
56#endif
57
58#include "opt_zero.h"
59
60#include <vm/vm.h>
61#include <vm/vm_page.h>
62#include <vm/pmap.h>
63
64#ifdef INVARIANTS
65#define M_SANITY m_sanity
66#else
67#define M_SANITY(a, b)
68#endif
69
70#define MAX_BUFS 36
71#define MAX_HVEC 8
72
73extern uint32_t collapse_free;
74extern uint32_t mb_free_vec_free;
75
76uma_zone_t zone_miovec;
77static int mi_inited = 0;
78
79void
80mi_init(void)
81{
82	if (mi_inited > 0)
83		return;
84	else
85		mi_inited++;
86
87	zone_miovec = uma_zcreate("MBUF IOVEC", MIOVBYTES,
88	    NULL, NULL, NULL, NULL,
89	    UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET);
90}
91
92void
93mi_deinit(void)
94{
95	mi_inited--;
96	if (mi_inited == 0)
97		uma_zdestroy(zone_miovec);
98}
99
100void
101dump_mi(struct mbuf_iovec *mi)
102{
103	int i;
104	struct mbuf_vec *mv;
105
106	printf("mi_flags=0x%08x mi_base=%p mi_data=%p mi_len=%d mi_type=%d\n",
107	    mi->mi_flags, mi->mi_base, mi->mi_data, mi->mi_len, mi->mi_type);
108
109	if (mi->mi_type == EXT_CLIOVEC ||
110	    mi->mi_type == EXT_IOVEC) {
111		mv = mtomv((struct mbuf *)mi->mi_base);
112		mi = mv->mv_vec;
113		for (i = 0; i < mv->mv_count; i++, mi++)
114			dump_mi(mi);
115
116	}
117}
118
119static __inline struct mbuf *
120_mcl_collapse_mbuf(struct mbuf_iovec *mi, struct mbuf *m)
121{
122	struct mbuf *n = m->m_next;
123
124	prefetch(n);
125
126	mi->mi_flags = m->m_flags;
127	mi->mi_len = m->m_len;
128	mi->mi_mbuf = NULL;
129
130	if (m->m_flags & M_PKTHDR) {
131		mi->mi_ether_vtag = m->m_pkthdr.ether_vtag;
132		mi->mi_tso_segsz = m->m_pkthdr.tso_segsz;
133#ifdef IFNET_MULTIQ
134		mi->mi_rss_hash = m->m_pkthdr.rss_hash;
135#endif
136	}
137	if (m->m_type != MT_DATA) {
138		mi->mi_data = NULL;
139		mi->mi_base = (caddr_t)m;
140		/*
141		 * XXX JMPIOVEC
142		 */
143		mi->mi_size = (m->m_type == EXT_CLIOVEC) ? MCLBYTES : MIOVBYTES;
144		mi->mi_type = m->m_type;
145		mi->mi_len = m->m_pkthdr.len;
146		KASSERT(mi->mi_len, ("empty packet"));
147		mi->mi_refcnt = NULL;
148	} else if (m->m_flags & M_EXT) {
149		memcpy(&mi->mi_ext, &m->m_ext, sizeof(struct m_ext_));
150		mi->mi_data = m->m_data;
151		mi->mi_base = m->m_ext.ext_buf;
152		mi->mi_type = m->m_ext.ext_type;
153		mi->mi_size = m->m_ext.ext_size;
154		mi->mi_refcnt = m->m_ext.ref_cnt;
155		mi->mi_mbuf = m;
156	} else {
157		mi->mi_base = (caddr_t)m;
158		mi->mi_data = m->m_data;
159		mi->mi_size = MSIZE;
160		mi->mi_type = EXT_MBUF;
161		mi->mi_refcnt = NULL;
162	}
163	KASSERT(mi->mi_len != 0, ("miov has len 0"));
164	KASSERT(mi->mi_type > 0, ("mi_type is invalid"));
165	KASSERT(mi->mi_base, ("mi_base is invalid"));
166	return (n);
167}
168
169struct mbuf *
170mi_collapse_mbuf(struct mbuf_iovec *mi, struct mbuf *m)
171{
172	return _mcl_collapse_mbuf(mi, m);
173}
174
175void *
176mcl_alloc(int seg_count, int *type)
177{
178	uma_zone_t zone;
179
180	if (seg_count > MAX_CL_IOV) {
181		zone = zone_jumbop;
182		*type = EXT_JMPIOVEC;
183	} else if (seg_count > MAX_MIOVEC_IOV) {
184		zone = zone_clust;
185		*type = EXT_CLIOVEC;
186	} else {
187		*type = EXT_IOVEC;
188		zone = zone_miovec;
189	}
190	return uma_zalloc_arg(zone, NULL, M_NOWAIT);
191}
192
193int
194busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs)
195{
196	struct mbuf *m0, *mhead, *n = *m;
197	struct mbuf_iovec *mi;
198	struct mbuf *marray[TX_MAX_SEGS];
199	int i, type, seg_count, defragged = 0, err = 0;
200	struct mbuf_vec *mv;
201
202	KASSERT(n->m_pkthdr.len, ("packet has zero header len"));
203
204	if (n->m_flags & M_PKTHDR && !SLIST_EMPTY(&n->m_pkthdr.tags))
205		m_tag_delete_chain(n, NULL);
206
207	if (n->m_pkthdr.len <= PIO_LEN)
208		return (0);
209retry:
210	seg_count = 0;
211	if (n->m_next == NULL) {
212		busdma_map_mbuf_fast(n, segs);
213		*nsegs = 1;
214		return (0);
215	}
216	while (n && seg_count < TX_MAX_SEGS) {
217		marray[seg_count] = n;
218
219		/*
220		 * firmware doesn't like empty segments
221		 */
222		if (__predict_true(n->m_len != 0))
223			seg_count++;
224
225		n = n->m_next;
226	}
227	if (seg_count == 0) {
228		if (cxgb_debug)
229			printf("empty segment chain\n");
230		err = EFBIG;
231		goto err_out;
232	}  else if (seg_count >= TX_MAX_SEGS) {
233		if (cxgb_debug)
234			printf("mbuf chain too long: %d max allowed %d\n",
235			    seg_count, TX_MAX_SEGS);
236		if (!defragged) {
237			n = m_defrag(*m, M_DONTWAIT);
238			if (n == NULL) {
239				err = ENOBUFS;
240				goto err_out;
241			}
242			*m = n;
243			defragged = 1;
244			goto retry;
245		}
246		err = EFBIG;
247		goto err_out;
248	}
249
250	if ((m0 = mcl_alloc(seg_count, &type)) == NULL) {
251		err = ENOMEM;
252		goto err_out;
253	}
254
255	memcpy(m0, *m, sizeof(struct m_hdr) + sizeof(struct pkthdr));
256	m0->m_type = type;
257	KASSERT(m0->m_pkthdr.len, ("empty packet being marshalled"));
258	mv = mtomv(m0);
259	mv->mv_count = seg_count;
260	mv->mv_first = 0;
261	for (i = 0, mi = mv->mv_vec; i < seg_count; mi++, segs++, i++) {
262		n = marray[i];
263		busdma_map_mbuf_fast(n, segs);
264		_mcl_collapse_mbuf(mi, n);
265	}
266	n = *m;
267	while (n) {
268		if (n->m_ext.ext_type == EXT_PACKET)
269			goto skip;
270		else if (n->m_len == 0)
271			/* do nothing */;
272		else if ((n->m_flags & (M_EXT|M_NOFREE)) == M_EXT)
273			n->m_flags &= ~M_EXT;
274		else
275			goto skip;
276		mhead = n->m_next;
277		m_free(n);
278		n = mhead;
279		continue;
280	skip:
281		n = n->m_next;
282	}
283	*nsegs = seg_count;
284	*m = m0;
285	DPRINTF("pktlen=%d m0=%p *m=%p m=%p\n", m0->m_pkthdr.len, m0, *m, m);
286	return (0);
287err_out:
288	m_freem(*m);
289	*m = NULL;
290	return (err);
291}
292
293int
294busdma_map_sg_vec(struct mbuf **m, struct mbuf **mret, bus_dma_segment_t *segs, int count)
295{
296	struct mbuf *m0, **mp;
297	struct mbuf_iovec *mi;
298	struct mbuf_vec *mv;
299	int i;
300
301	if (count > MAX_MIOVEC_IOV) {
302		if ((m0 = uma_zalloc_arg(zone_clust, NULL, M_NOWAIT)) == NULL)
303			return (ENOMEM);
304		m0->m_type = EXT_CLIOVEC;
305	} else {
306		if ((m0 = uma_zalloc_arg(zone_miovec, NULL, M_NOWAIT)) == NULL)
307			return (ENOMEM);
308		m0->m_type = EXT_IOVEC;
309	}
310
311	m0->m_flags = 0;
312	m0->m_pkthdr.len = m0->m_len = (*m)->m_len; /* not the real length but needs to be non-zero */
313	mv = mtomv(m0);
314	mv->mv_count = count;
315	mv->mv_first = 0;
316	for (mp = m, i = 0, mi = mv->mv_vec; i < count; mp++, segs++, mi++, i++) {
317		if ((*mp)->m_flags & M_PKTHDR && !SLIST_EMPTY(&(*mp)->m_pkthdr.tags))
318			m_tag_delete_chain(*mp, NULL);
319		busdma_map_mbuf_fast(*mp, segs);
320		_mcl_collapse_mbuf(mi, *mp);
321		KASSERT(mi->mi_len, ("empty packet"));
322	}
323
324	for (mp = m, i = 0; i < count; i++, mp++) {
325		(*mp)->m_next = (*mp)->m_nextpkt = NULL;
326		if (((*mp)->m_flags & (M_EXT|M_NOFREE)) == M_EXT) {
327			(*mp)->m_flags &= ~M_EXT;
328			m_free(*mp);
329		}
330	}
331
332	*mret = m0;
333	return (0);
334}
335
336void
337mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx)
338{
339	u_int cnt;
340	int dofree;
341	caddr_t cl;
342
343	/* Account for lazy ref count assign. */
344	dofree = (mi->mi_refcnt == NULL);
345
346	/*
347	 * This is tricky.  We need to make sure to decrement the
348	 * refcount in a safe way but to also clean up if we're the
349	 * last reference.  This method seems to do it without race.
350	 */
351	while (dofree == 0) {
352		cnt = *(mi->mi_refcnt);
353		if (cnt == 1) {
354			dofree = 1;
355			break;
356		}
357		if (atomic_cmpset_int(mi->mi_refcnt, cnt, cnt - 1)) {
358			if (cnt == 1)
359				dofree = 1;
360			break;
361		}
362	}
363	if (dofree == 0)
364		return;
365
366	cl = mi->mi_base;
367	switch (type) {
368	case EXT_MBUF:
369		m_free_fast((struct mbuf *)cl);
370		break;
371	case EXT_CLUSTER:
372		cxgb_cache_put(zone_clust, cl);
373		break;
374	case EXT_JUMBOP:
375		cxgb_cache_put(zone_jumbop, cl);
376		break;
377	case EXT_JUMBO9:
378		cxgb_cache_put(zone_jumbo9, cl);
379		break;
380	case EXT_JUMBO16:
381		cxgb_cache_put(zone_jumbo16, cl);
382		break;
383	case EXT_SFBUF:
384	case EXT_NET_DRV:
385	case EXT_MOD_TYPE:
386	case EXT_DISPOSABLE:
387		*(mi->mi_refcnt) = 0;
388		uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
389			mi->mi_ext.ref_cnt));
390		/* FALLTHROUGH */
391	case EXT_EXTREF:
392		KASSERT(mi->mi_ext.ext_free != NULL,
393		    ("%s: ext_free not set", __func__));
394		(*(mi->mi_ext.ext_free))(mi->mi_ext.ext_buf,
395		    mi->mi_ext.ext_args);
396		break;
397	case EXT_PACKET:
398		if (*(mi->mi_refcnt) == 0)
399			*(mi->mi_refcnt) = 1;
400		uma_zfree(zone_pack, mi->mi_mbuf);
401		break;
402	default:
403		dump_mi(mi);
404		panic("unknown mv type in m_free_vec type=%d idx=%d", type, idx);
405		break;
406	}
407}
408
409int
410_m_explode(struct mbuf *m)
411{
412	panic("IMPLEMENT ME!!!");
413}
414
415
416