uipc_mvec.c revision 168736
1/************************************************************************** 2 * 3 * Copyright (c) 2007, Kip Macy kmacy@freebsd.org 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 * 28 * 29 ***************************************************************************/ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD: head/sys/dev/cxgb/sys/uipc_mvec.c 168736 2007-04-14 20:38:38Z kmacy $"); 33 34#include <sys/param.h> 35#include <sys/systm.h> 36#include <sys/kernel.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mbuf.h> 40#include <sys/ktr.h> 41#include <sys/sf_buf.h> 42 43#include <machine/bus.h> 44#include <dev/cxgb/sys/mvec.h> 45 46#include "opt_zero.h" 47#ifdef ZERO_COPY_SOCKETS 48#error "ZERO_COPY_SOCKETS not supported with mvec" 49#endif 50 51#ifdef DEBUG 52#define DPRINTF printf 53#else 54#define DPRINTF(...) 55#endif 56 57#ifdef INVARIANTS 58#define M_SANITY m_sanity 59#else 60#define M_SANITY(a, b) 61#endif 62 63#define MAX_BUFS 36 64#define MAX_HVEC 8 65 66extern uint32_t collapse_free; 67extern uint32_t mb_free_vec_free; 68 69struct mbuf_ext { 70 struct mbuf *me_m; 71 caddr_t me_base; 72 volatile u_int *me_refcnt; 73 int me_flags; 74 uint32_t me_offset; 75}; 76 77int 78_m_explode(struct mbuf *m) 79{ 80 int i, offset, type, first, len; 81 uint8_t *cl; 82 struct mbuf *m0, *head = NULL; 83 struct mbuf_vec *mv; 84 85#ifdef INVARIANTS 86 len = m->m_len; 87 m0 = m->m_next; 88 while (m0) { 89 KASSERT((m0->m_flags & M_PKTHDR) == 0, 90 ("pkthdr set on intermediate mbuf - pre")); 91 len += m0->m_len; 92 m0 = m0->m_next; 93 94 } 95 if (len != m->m_pkthdr.len) 96 panic("at start len=%d pktlen=%d", len, m->m_pkthdr.len); 97#endif 98 mv = mtomv(m); 99 first = mv->mv_first; 100 for (i = mv->mv_count + first - 1; i > first; i--) { 101 type = mbuf_vec_get_type(mv, i); 102 cl = mv->mv_vec[i].mi_base; 103 offset = mv->mv_vec[i].mi_offset; 104 len = mv->mv_vec[i].mi_len; 105 if (__predict_false(type == EXT_MBUF)) { 106 m0 = (struct mbuf *)cl; 107 KASSERT((m0->m_flags & M_EXT) == 0, ("M_EXT set on mbuf")); 108 m0->m_len = len; 109 m0->m_data = cl + offset; 110 goto skip_cluster; 111 112 } else if ((m0 = m_get(M_NOWAIT, MT_DATA)) == NULL) { 113 /* 114 * Check for extra memory leaks 115 */ 116 m_freem(head); 117 return (ENOMEM); 118 } 119 m0->m_flags = 0; 120 121 m_cljset(m0, (uint8_t *)cl, type); 122 m0->m_len = mv->mv_vec[i].mi_len; 123 if (offset) 124 m_adj(m0, offset); 125 skip_cluster: 126 m0->m_next = head; 127 m->m_len -= m0->m_len; 128 head = m0; 129 } 130 offset = mv->mv_vec[first].mi_offset; 131 cl = mv->mv_vec[first].mi_base; 132 type = mbuf_vec_get_type(mv, first); 133 m->m_flags &= ~(M_IOVEC); 134 m_cljset(m, cl, type); 135 if (offset) 136 m_adj(m, offset); 137 m->m_next = head; 138 head = m; 139 M_SANITY(m, 0); 140#ifdef INVARIANTS 141 len = head->m_len; 142 m = m->m_next; 143 while (m) { 144 KASSERT((m->m_flags & M_PKTHDR) == 0, 145 ("pkthdr set on intermediate mbuf - post")); 146 len += m->m_len; 147 m = m->m_next; 148 149 } 150 if (len != head->m_pkthdr.len) 151 panic("len=%d pktlen=%d", len, head->m_pkthdr.len); 152#endif 153 return (0); 154} 155 156static __inline int 157m_vectorize(struct mbuf *m, int max, struct mbuf **vec, int *count) 158{ 159 int i, error = 0; 160 161 for (i = 0; i < max; i++) { 162 if (m == NULL) 163 break; 164#ifndef PACKET_ZONE_DISABLED 165 if ((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_PACKET)) 166 return (EINVAL); 167#endif 168 if (m->m_len == 0) 169 DPRINTF("m=%p is len=0\n", m); 170 M_SANITY(m, 0); 171 vec[i] = m; 172 m = m->m_next; 173 } 174 if (m) 175 error = EFBIG; 176 177 *count = i; 178 179 return (error); 180} 181 182static __inline int 183m_findmbufs(struct mbuf **ivec, int maxbufs, struct mbuf_ext *ovec, int osize, int *ocount) 184{ 185 int i, j, nhbufsneed, nhbufs; 186 struct mbuf *m; 187 188 nhbufsneed = min(((maxbufs - 1)/MAX_MBUF_IOV) + 1, osize); 189 ovec[0].me_m = NULL; 190 191 for (nhbufs = j = i = 0; i < maxbufs && nhbufs < nhbufsneed; i++) { 192 if ((ivec[i]->m_flags & M_EXT) == 0) 193 continue; 194 m = ivec[i]; 195 ovec[nhbufs].me_m = m; 196 ovec[nhbufs].me_base = m->m_ext.ext_buf; 197 ovec[nhbufs].me_refcnt = m->m_ext.ref_cnt; 198 ovec[nhbufs].me_offset = (m->m_data - m->m_ext.ext_buf); 199 ovec[nhbufs].me_flags = m->m_ext.ext_type; 200 nhbufs++; 201 } 202 if (nhbufs == 0) { 203 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) 204 goto m_getfail; 205 ovec[nhbufs].me_m = m; 206 nhbufs = 1; 207 } 208 while (nhbufs < nhbufsneed) { 209 if ((m = m_get(M_NOWAIT, MT_DATA)) == NULL) 210 goto m_getfail; 211 ovec[nhbufs].me_m = m; 212 nhbufs++; 213 } 214 /* 215 * Copy over packet header to new head of chain 216 */ 217 if (ovec[0].me_m != ivec[0]) { 218 ovec[0].me_m->m_flags |= M_PKTHDR; 219 memcpy(&ovec[0].me_m->m_pkthdr, &ivec[0]->m_pkthdr, sizeof(struct pkthdr)); 220 SLIST_INIT(&ivec[0]->m_pkthdr.tags); 221 } 222 *ocount = nhbufs; 223 return (0); 224m_getfail: 225 for (i = 0; i < nhbufs; i++) 226 if ((ovec[i].me_m->m_flags & M_EXT) == 0) 227 uma_zfree(zone_mbuf, ovec[i].me_m); 228 return (ENOMEM); 229 230} 231 232static __inline void 233m_setiovec(struct mbuf_iovec *mi, struct mbuf *m, struct mbuf_ext *extvec, int *me_index, 234 int max_me_index) 235{ 236 int idx = *me_index; 237 238 mi->mi_len = m->m_len; 239 if (idx < max_me_index && extvec[idx].me_m == m) { 240 struct mbuf_ext *me = &extvec[idx]; 241 (*me_index)++; 242 mi->mi_base = me->me_base; 243 mi->mi_refcnt = me->me_refcnt; 244 mi->mi_offset = me->me_offset; 245 mi->mi_flags = me->me_flags; 246 } else if (m->m_flags & M_EXT) { 247 mi->mi_base = m->m_ext.ext_buf; 248 mi->mi_refcnt = m->m_ext.ref_cnt; 249 mi->mi_offset = 250 (m->m_data - m->m_ext.ext_buf); 251 mi->mi_flags = m->m_ext.ext_type; 252 } else { 253 KASSERT(m->m_len < 256, ("mbuf too large len=%d", 254 m->m_len)); 255 mi->mi_base = (uint8_t *)m; 256 mi->mi_refcnt = NULL; 257 mi->mi_offset = 258 (m->m_data - (caddr_t)m); 259 mi->mi_flags = EXT_MBUF; 260 } 261 DPRINTF("type=%d len=%d refcnt=%p cl=%p offset=0x%x\n", 262 mi->mi_flags, mi->mi_len, mi->mi_refcnt, mi->mi_base, 263 mi->mi_offset); 264} 265 266int 267_m_collapse(struct mbuf *m, int maxbufs, struct mbuf **mnew) 268{ 269 struct mbuf *m0, *lmvec[MAX_BUFS]; 270 struct mbuf **mnext; 271 struct mbuf **vec = &lmvec[0]; 272 struct mbuf *mhead = NULL; 273 struct mbuf_vec *mv; 274 int err, i, j, max, len, nhbufs; 275 struct mbuf_ext dvec[MAX_HVEC]; 276 int hidx = 0, dvecidx; 277 278 M_SANITY(m, 0); 279 if (maxbufs > MAX_BUFS) { 280 if ((vec = malloc(maxbufs * sizeof(struct mbuf *), 281 M_DEVBUF, M_NOWAIT)) == NULL) 282 return (ENOMEM); 283 } 284 285 if ((err = m_vectorize(m, maxbufs, vec, &max)) != 0) 286 return (err); 287 if ((err = m_findmbufs(vec, max, dvec, MAX_HVEC, &nhbufs)) != 0) 288 return (err); 289 290 KASSERT(max > 0, ("invalid mbuf count")); 291 KASSERT(nhbufs > 0, ("invalid header mbuf count")); 292 293 mhead = m0 = dvec[0].me_m; 294 295 DPRINTF("nbufs=%d nhbufs=%d\n", max, nhbufs); 296 for (hidx = dvecidx = i = 0, mnext = NULL; i < max; hidx++) { 297 m0 = dvec[hidx].me_m; 298 m0->m_flags &= ~M_EXT; 299 m0->m_flags |= M_IOVEC; 300 301 if (mnext) 302 *mnext = m0; 303 304 mv = mtomv(m0); 305 len = mv->mv_first = 0; 306 for (j = 0; j < MAX_MBUF_IOV && i < max; j++, i++) { 307 struct mbuf_iovec *mi = &mv->mv_vec[j]; 308 309 m_setiovec(mi, vec[i], dvec, &dvecidx, nhbufs); 310 len += mi->mi_len; 311 } 312 m0->m_data = mv->mv_vec[0].mi_base + mv->mv_vec[0].mi_offset; 313 mv->mv_count = j; 314 m0->m_len = len; 315 mnext = &m0->m_next; 316 DPRINTF("count=%d len=%d\n", j, len); 317 } 318 319 /* 320 * Terminate chain 321 */ 322 m0->m_next = NULL; 323 324 /* 325 * Free all mbufs not used by the mbuf iovec chain 326 */ 327 for (i = 0; i < max; i++) 328 if (vec[i]->m_flags & M_EXT) { 329 vec[i]->m_flags &= ~M_EXT; 330 collapse_free++; 331 uma_zfree(zone_mbuf, vec[i]); 332 } 333 334 *mnew = mhead; 335 return (0); 336} 337 338void 339mb_free_vec(struct mbuf *m) 340{ 341 struct mbuf_vec *mv; 342 int i; 343 344 KASSERT((m->m_flags & (M_EXT|M_IOVEC)) == M_IOVEC, 345 ("%s: M_EXT set", __func__)); 346 347 mv = mtomv(m); 348 KASSERT(mv->mv_count <= MAX_MBUF_IOV, 349 ("%s: mi_count too large %d", __func__, mv->mv_count)); 350 351 DPRINTF("count=%d len=%d\n", mv->mv_count, m->m_len); 352 for (i = mv->mv_first; i < mv->mv_count; i++) { 353 uma_zone_t zone = NULL; 354 volatile int *refcnt = mv->mv_vec[i].mi_refcnt; 355 int type = mbuf_vec_get_type(mv, i); 356 void *cl = mv->mv_vec[i].mi_base; 357 358 if (refcnt && *refcnt != 1 && atomic_fetchadd_int(refcnt, -1) != 1) 359 continue; 360 361 DPRINTF("freeing idx=%d refcnt=%p type=%d cl=%p\n", i, refcnt, type, cl); 362 switch (type) { 363 case EXT_MBUF: 364 mb_free_vec_free++; 365 case EXT_CLUSTER: 366 case EXT_JUMBOP: 367 case EXT_JUMBO9: 368 case EXT_JUMBO16: 369 zone = m_getzonefromtype(type); 370 uma_zfree(zone, cl); 371 continue; 372 case EXT_SFBUF: 373 *refcnt = 0; 374 uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, 375 refcnt)); 376#ifdef __i386__ 377 sf_buf_mext(cl, mv->mv_vec[i].mi_args); 378#else 379 /* 380 * Every architecture other than i386 uses a vm_page 381 * for an sf_buf (well ... sparc64 does but shouldn't) 382 */ 383 sf_buf_mext(cl, PHYS_TO_VM_PAGE(vtophys(cl))); 384#endif 385 continue; 386 default: 387 KASSERT(m->m_ext.ext_type == 0, 388 ("%s: unknown ext_type", __func__)); 389 break; 390 } 391 } 392 /* 393 * Free this mbuf back to the mbuf zone with all iovec 394 * information purged. 395 */ 396 mb_free_vec_free++; 397 uma_zfree(zone_mbuf, m); 398} 399 400#if (!defined(__sparc64__) && !defined(__sun4v__)) 401struct mvec_sg_cb_arg { 402 bus_dma_segment_t *segs; 403 int error; 404 int index; 405 int nseg; 406}; 407 408struct bus_dma_tag { 409 bus_dma_tag_t parent; 410 bus_size_t alignment; 411 bus_size_t boundary; 412 bus_addr_t lowaddr; 413 bus_addr_t highaddr; 414 bus_dma_filter_t *filter; 415 void *filterarg; 416 bus_size_t maxsize; 417 u_int nsegments; 418 bus_size_t maxsegsz; 419 int flags; 420 int ref_count; 421 int map_count; 422 bus_dma_lock_t *lockfunc; 423 void *lockfuncarg; 424 bus_dma_segment_t *segments; 425 struct bounce_zone *bounce_zone; 426}; 427 428static void 429mvec_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 430{ 431 struct mvec_sg_cb_arg *cb_arg = arg; 432 433 cb_arg->error = error; 434 cb_arg->segs[cb_arg->index] = segs[0]; 435 cb_arg->nseg = nseg; 436 KASSERT(nseg == 1, ("nseg=%d", nseg)); 437} 438 439int 440bus_dmamap_load_mvec_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0, 441 bus_dma_segment_t *segs, int *nsegs, int flags) 442{ 443 int error; 444 struct mvec_sg_cb_arg cb_arg; 445 446 M_ASSERTPKTHDR(m0); 447 448 if ((m0->m_flags & M_IOVEC) == 0) 449 return (bus_dmamap_load_mbuf_sg(dmat, map, m0, segs, nsegs, flags)); 450 451 flags |= BUS_DMA_NOWAIT; 452 *nsegs = 0; 453 error = 0; 454 if (m0->m_pkthdr.len <= dmat->maxsize) { 455 struct mbuf *m; 456 cb_arg.segs = segs; 457 for (m = m0; m != NULL && error == 0; m = m->m_next) { 458 struct mbuf_vec *mv; 459 int count, first, i; 460 if (!(m->m_len > 0)) 461 continue; 462 463 mv = mtomv(m); 464 count = mv->mv_count; 465 first = mv->mv_first; 466 KASSERT(count <= MAX_MBUF_IOV, ("count=%d too large", count)); 467 for (i = first; i < count; i++) { 468 void *data = mv->mv_vec[i].mi_base + mv->mv_vec[i].mi_offset; 469 int size = mv->mv_vec[i].mi_len; 470 471 if (size == 0) 472 continue; 473 DPRINTF("mapping data=%p size=%d\n", data, size); 474 cb_arg.index = *nsegs; 475 error = bus_dmamap_load(dmat, map, 476 data, size, mvec_cb, &cb_arg, flags); 477 (*nsegs)++; 478 479 if (*nsegs >= dmat->nsegments) { 480 DPRINTF("*nsegs=%d dmat->nsegments=%d index=%d\n", 481 *nsegs, dmat->nsegments, cb_arg.index); 482 error = EFBIG; 483 goto err_out; 484 } 485 if (error || cb_arg.error) 486 goto err_out; 487 } 488 } 489 } else { 490 error = EINVAL; 491 } 492 CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", 493 __func__, dmat, dmat->flags, error, *nsegs); 494 return (error); 495 496err_out: 497 if (cb_arg.error) 498 return (cb_arg.error); 499 500 return (error); 501} 502#endif /* !__sparc64__ && !__sun4v__ */ 503