uipc_mvec.c revision 168736
1/**************************************************************************
2 *
3 * Copyright (c) 2007, Kip Macy kmacy@freebsd.org
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice,
10 *    this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 *
28 *
29 ***************************************************************************/
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/dev/cxgb/sys/uipc_mvec.c 168736 2007-04-14 20:38:38Z kmacy $");
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/mbuf.h>
40#include <sys/ktr.h>
41#include <sys/sf_buf.h>
42
43#include <machine/bus.h>
44#include <dev/cxgb/sys/mvec.h>
45
46#include "opt_zero.h"
47#ifdef ZERO_COPY_SOCKETS
48#error "ZERO_COPY_SOCKETS not supported with mvec"
49#endif
50
51#ifdef DEBUG
52#define DPRINTF printf
53#else
54#define DPRINTF(...)
55#endif
56
57#ifdef INVARIANTS
58#define M_SANITY m_sanity
59#else
60#define M_SANITY(a, b)
61#endif
62
63#define MAX_BUFS 36
64#define MAX_HVEC 8
65
66extern uint32_t collapse_free;
67extern uint32_t mb_free_vec_free;
68
69struct mbuf_ext {
70	struct mbuf    *me_m;
71	caddr_t         me_base;
72	volatile u_int *me_refcnt;
73	int             me_flags;
74	uint32_t        me_offset;
75};
76
77int
78_m_explode(struct mbuf *m)
79{
80        int i, offset, type, first, len;
81        uint8_t *cl;
82        struct mbuf *m0, *head = NULL;
83        struct mbuf_vec *mv;
84
85#ifdef INVARIANTS
86	len = m->m_len;
87	m0 = m->m_next;
88	while (m0) {
89		KASSERT((m0->m_flags & M_PKTHDR) == 0,
90		    ("pkthdr set on intermediate mbuf - pre"));
91		len += m0->m_len;
92		m0 = m0->m_next;
93
94	}
95	if (len != m->m_pkthdr.len)
96		panic("at start len=%d pktlen=%d", len, m->m_pkthdr.len);
97#endif
98        mv = mtomv(m);
99	first = mv->mv_first;
100        for (i = mv->mv_count + first - 1; i > first; i--) {
101		type = mbuf_vec_get_type(mv, i);
102                cl = mv->mv_vec[i].mi_base;
103		offset = mv->mv_vec[i].mi_offset;
104		len = mv->mv_vec[i].mi_len;
105		if (__predict_false(type == EXT_MBUF)) {
106			m0 = (struct mbuf *)cl;
107			KASSERT((m0->m_flags & M_EXT) == 0, ("M_EXT set on mbuf"));
108			m0->m_len = len;
109			m0->m_data = cl + offset;
110			goto skip_cluster;
111
112		} else if ((m0 = m_get(M_NOWAIT, MT_DATA)) == NULL) {
113			/*
114			 * Check for extra memory leaks
115			 */
116			m_freem(head);
117			return (ENOMEM);
118                }
119		m0->m_flags = 0;
120
121		m_cljset(m0, (uint8_t *)cl, type);
122		m0->m_len = mv->mv_vec[i].mi_len;
123		if (offset)
124			m_adj(m0, offset);
125	skip_cluster:
126		m0->m_next = head;
127		m->m_len -= m0->m_len;
128		head = m0;
129	}
130	offset = mv->mv_vec[first].mi_offset;
131	cl = mv->mv_vec[first].mi_base;
132	type = mbuf_vec_get_type(mv, first);
133	m->m_flags &= ~(M_IOVEC);
134	m_cljset(m, cl, type);
135	if (offset)
136		m_adj(m, offset);
137	m->m_next = head;
138	head = m;
139	M_SANITY(m, 0);
140#ifdef INVARIANTS
141	len = head->m_len;
142	m = m->m_next;
143	while (m) {
144		KASSERT((m->m_flags & M_PKTHDR) == 0,
145		    ("pkthdr set on intermediate mbuf - post"));
146		len += m->m_len;
147		m = m->m_next;
148
149	}
150	if (len != head->m_pkthdr.len)
151		panic("len=%d pktlen=%d", len, head->m_pkthdr.len);
152#endif
153	return (0);
154}
155
156static __inline int
157m_vectorize(struct mbuf *m, int max, struct mbuf **vec, int *count)
158{
159	int i, error = 0;
160
161	for (i = 0; i < max; i++) {
162		if (m == NULL)
163			break;
164#ifndef PACKET_ZONE_DISABLED
165		if ((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_PACKET))
166			return (EINVAL);
167#endif
168		if (m->m_len == 0)
169			DPRINTF("m=%p is len=0\n", m);
170		M_SANITY(m, 0);
171		vec[i] = m;
172		m = m->m_next;
173	}
174	if (m)
175		error = EFBIG;
176
177	*count = i;
178
179	return (error);
180}
181
182static __inline int
183m_findmbufs(struct mbuf **ivec, int maxbufs, struct mbuf_ext *ovec, int osize, int *ocount)
184{
185	int i, j, nhbufsneed, nhbufs;
186	struct mbuf *m;
187
188	nhbufsneed = min(((maxbufs - 1)/MAX_MBUF_IOV) + 1, osize);
189	ovec[0].me_m = NULL;
190
191	for (nhbufs = j = i = 0; i < maxbufs && nhbufs < nhbufsneed; i++) {
192		if ((ivec[i]->m_flags & M_EXT) == 0)
193			continue;
194		m = ivec[i];
195		ovec[nhbufs].me_m = m;
196		ovec[nhbufs].me_base = m->m_ext.ext_buf;
197		ovec[nhbufs].me_refcnt = m->m_ext.ref_cnt;
198		ovec[nhbufs].me_offset = (m->m_data - m->m_ext.ext_buf);
199		ovec[nhbufs].me_flags = m->m_ext.ext_type;
200		nhbufs++;
201	}
202	if (nhbufs == 0) {
203		if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
204			goto m_getfail;
205		ovec[nhbufs].me_m = m;
206		nhbufs = 1;
207	}
208	while (nhbufs < nhbufsneed) {
209		if ((m = m_get(M_NOWAIT, MT_DATA)) == NULL)
210			goto m_getfail;
211		ovec[nhbufs].me_m = m;
212		nhbufs++;
213	}
214	/*
215	 * Copy over packet header to new head of chain
216	 */
217	if (ovec[0].me_m != ivec[0]) {
218		ovec[0].me_m->m_flags |= M_PKTHDR;
219		memcpy(&ovec[0].me_m->m_pkthdr, &ivec[0]->m_pkthdr, sizeof(struct pkthdr));
220		SLIST_INIT(&ivec[0]->m_pkthdr.tags);
221	}
222	*ocount = nhbufs;
223	return (0);
224m_getfail:
225	for (i = 0; i < nhbufs; i++)
226		if ((ovec[i].me_m->m_flags & M_EXT) == 0)
227			uma_zfree(zone_mbuf, ovec[i].me_m);
228	return (ENOMEM);
229
230}
231
232static __inline void
233m_setiovec(struct mbuf_iovec *mi, struct mbuf *m, struct mbuf_ext *extvec, int *me_index,
234    int max_me_index)
235{
236	int idx = *me_index;
237
238	mi->mi_len = m->m_len;
239	if (idx < max_me_index && extvec[idx].me_m == m) {
240		struct mbuf_ext *me = &extvec[idx];
241		(*me_index)++;
242		mi->mi_base = me->me_base;
243		mi->mi_refcnt = me->me_refcnt;
244		mi->mi_offset = me->me_offset;
245		mi->mi_flags = me->me_flags;
246	} else if (m->m_flags & M_EXT) {
247		mi->mi_base = m->m_ext.ext_buf;
248		mi->mi_refcnt = m->m_ext.ref_cnt;
249		mi->mi_offset =
250		    (m->m_data - m->m_ext.ext_buf);
251		mi->mi_flags = m->m_ext.ext_type;
252	} else {
253		KASSERT(m->m_len < 256, ("mbuf too large len=%d",
254			m->m_len));
255		mi->mi_base = (uint8_t *)m;
256		mi->mi_refcnt = NULL;
257		mi->mi_offset =
258		    (m->m_data - (caddr_t)m);
259		mi->mi_flags = EXT_MBUF;
260	}
261	DPRINTF("type=%d len=%d refcnt=%p cl=%p offset=0x%x\n",
262	    mi->mi_flags, mi->mi_len, mi->mi_refcnt, mi->mi_base,
263	    mi->mi_offset);
264}
265
266int
267_m_collapse(struct mbuf *m, int maxbufs, struct mbuf **mnew)
268{
269	struct mbuf *m0, *lmvec[MAX_BUFS];
270	struct mbuf **mnext;
271	struct mbuf **vec = &lmvec[0];
272	struct mbuf *mhead = NULL;
273	struct mbuf_vec *mv;
274	int err, i, j, max, len, nhbufs;
275	struct mbuf_ext dvec[MAX_HVEC];
276	int hidx = 0, dvecidx;
277
278	M_SANITY(m, 0);
279	if (maxbufs > MAX_BUFS) {
280		if ((vec = malloc(maxbufs * sizeof(struct mbuf *),
281			    M_DEVBUF, M_NOWAIT)) == NULL)
282			return (ENOMEM);
283	}
284
285	if ((err = m_vectorize(m, maxbufs, vec, &max)) != 0)
286		return (err);
287	if ((err = m_findmbufs(vec, max, dvec, MAX_HVEC, &nhbufs)) != 0)
288		return (err);
289
290	KASSERT(max > 0, ("invalid mbuf count"));
291	KASSERT(nhbufs > 0, ("invalid header mbuf count"));
292
293	mhead = m0 = dvec[0].me_m;
294
295	DPRINTF("nbufs=%d nhbufs=%d\n", max, nhbufs);
296	for (hidx = dvecidx = i = 0, mnext = NULL; i < max; hidx++) {
297		m0 = dvec[hidx].me_m;
298		m0->m_flags &= ~M_EXT;
299		m0->m_flags |= M_IOVEC;
300
301		if (mnext)
302			*mnext = m0;
303
304		mv = mtomv(m0);
305		len = mv->mv_first = 0;
306		for (j = 0; j < MAX_MBUF_IOV && i < max; j++, i++) {
307			struct mbuf_iovec *mi = &mv->mv_vec[j];
308
309			m_setiovec(mi, vec[i], dvec, &dvecidx, nhbufs);
310			len += mi->mi_len;
311		}
312		m0->m_data = mv->mv_vec[0].mi_base + mv->mv_vec[0].mi_offset;
313		mv->mv_count = j;
314		m0->m_len = len;
315		mnext = &m0->m_next;
316		DPRINTF("count=%d len=%d\n", j, len);
317	}
318
319	/*
320	 * Terminate chain
321	 */
322	m0->m_next = NULL;
323
324	/*
325	 * Free all mbufs not used by the mbuf iovec chain
326	 */
327	for (i = 0; i < max; i++)
328		if (vec[i]->m_flags & M_EXT) {
329			vec[i]->m_flags &= ~M_EXT;
330			collapse_free++;
331			uma_zfree(zone_mbuf, vec[i]);
332		}
333
334	*mnew = mhead;
335	return (0);
336}
337
338void
339mb_free_vec(struct mbuf *m)
340{
341	struct mbuf_vec *mv;
342	int i;
343
344	KASSERT((m->m_flags & (M_EXT|M_IOVEC)) == M_IOVEC,
345	    ("%s: M_EXT set", __func__));
346
347	mv = mtomv(m);
348	KASSERT(mv->mv_count <= MAX_MBUF_IOV,
349	    ("%s: mi_count too large %d", __func__, mv->mv_count));
350
351	DPRINTF("count=%d len=%d\n", mv->mv_count, m->m_len);
352	for (i = mv->mv_first; i < mv->mv_count; i++) {
353		uma_zone_t zone = NULL;
354		volatile int *refcnt = mv->mv_vec[i].mi_refcnt;
355		int type = mbuf_vec_get_type(mv, i);
356		void *cl = mv->mv_vec[i].mi_base;
357
358		if (refcnt && *refcnt != 1 && atomic_fetchadd_int(refcnt, -1) != 1)
359			continue;
360
361		DPRINTF("freeing idx=%d refcnt=%p type=%d cl=%p\n", i, refcnt, type, cl);
362		switch (type) {
363		case EXT_MBUF:
364			mb_free_vec_free++;
365		case EXT_CLUSTER:
366		case EXT_JUMBOP:
367		case EXT_JUMBO9:
368		case EXT_JUMBO16:
369			zone = m_getzonefromtype(type);
370			uma_zfree(zone, cl);
371			continue;
372		case EXT_SFBUF:
373			*refcnt = 0;
374			uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
375				refcnt));
376#ifdef __i386__
377			sf_buf_mext(cl, mv->mv_vec[i].mi_args);
378#else
379			/*
380			 * Every architecture other than i386 uses a vm_page
381			 * for an sf_buf (well ... sparc64 does but shouldn't)
382			 */
383			sf_buf_mext(cl, PHYS_TO_VM_PAGE(vtophys(cl)));
384#endif
385			continue;
386		default:
387			KASSERT(m->m_ext.ext_type == 0,
388				("%s: unknown ext_type", __func__));
389			break;
390		}
391	}
392	/*
393	 * Free this mbuf back to the mbuf zone with all iovec
394	 * information purged.
395	 */
396	mb_free_vec_free++;
397	uma_zfree(zone_mbuf, m);
398}
399
400#if (!defined(__sparc64__) && !defined(__sun4v__))
401struct mvec_sg_cb_arg {
402	bus_dma_segment_t *segs;
403	int error;
404	int index;
405	int nseg;
406};
407
408struct bus_dma_tag {
409	bus_dma_tag_t	  parent;
410	bus_size_t	  alignment;
411	bus_size_t	  boundary;
412	bus_addr_t	  lowaddr;
413	bus_addr_t	  highaddr;
414	bus_dma_filter_t *filter;
415	void		 *filterarg;
416	bus_size_t	  maxsize;
417	u_int		  nsegments;
418	bus_size_t	  maxsegsz;
419	int		  flags;
420	int		  ref_count;
421	int		  map_count;
422	bus_dma_lock_t	 *lockfunc;
423	void		 *lockfuncarg;
424	bus_dma_segment_t *segments;
425	struct bounce_zone *bounce_zone;
426};
427
428static void
429mvec_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
430{
431	struct mvec_sg_cb_arg *cb_arg = arg;
432
433	cb_arg->error = error;
434	cb_arg->segs[cb_arg->index] = segs[0];
435	cb_arg->nseg = nseg;
436	KASSERT(nseg == 1, ("nseg=%d", nseg));
437}
438
439int
440bus_dmamap_load_mvec_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
441                        bus_dma_segment_t *segs, int *nsegs, int flags)
442{
443	int error;
444	struct mvec_sg_cb_arg cb_arg;
445
446	M_ASSERTPKTHDR(m0);
447
448	if ((m0->m_flags & M_IOVEC) == 0)
449		return (bus_dmamap_load_mbuf_sg(dmat, map, m0, segs, nsegs, flags));
450
451	flags |= BUS_DMA_NOWAIT;
452	*nsegs = 0;
453	error = 0;
454	if (m0->m_pkthdr.len <= dmat->maxsize) {
455		struct mbuf *m;
456		cb_arg.segs = segs;
457		for (m = m0; m != NULL && error == 0; m = m->m_next) {
458			struct mbuf_vec *mv;
459			int count, first, i;
460			if (!(m->m_len > 0))
461				continue;
462
463			mv = mtomv(m);
464			count = mv->mv_count;
465			first = mv->mv_first;
466			KASSERT(count <= MAX_MBUF_IOV, ("count=%d too large", count));
467			for (i = first; i < count; i++) {
468				void *data = mv->mv_vec[i].mi_base + mv->mv_vec[i].mi_offset;
469				int size = mv->mv_vec[i].mi_len;
470
471				if (size == 0)
472					continue;
473				DPRINTF("mapping data=%p size=%d\n", data, size);
474				cb_arg.index = *nsegs;
475				error = bus_dmamap_load(dmat, map,
476				    data, size, mvec_cb, &cb_arg, flags);
477				(*nsegs)++;
478
479				if (*nsegs >= dmat->nsegments) {
480					DPRINTF("*nsegs=%d dmat->nsegments=%d index=%d\n",
481					    *nsegs, dmat->nsegments, cb_arg.index);
482					error = EFBIG;
483					goto err_out;
484				}
485				if (error || cb_arg.error)
486					goto err_out;
487			}
488		}
489	} else {
490		error = EINVAL;
491	}
492	CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
493	    __func__, dmat, dmat->flags, error, *nsegs);
494	return (error);
495
496err_out:
497	if (cb_arg.error)
498		return (cb_arg.error);
499
500	return (error);
501}
502#endif /* !__sparc64__  && !__sun4v__ */
503