uipc_mvec.c revision 168886
1/************************************************************************** 2 * 3 * Copyright (c) 2007, Kip Macy kmacy@freebsd.org 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 * 28 * 29 ***************************************************************************/ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD: head/sys/dev/cxgb/sys/uipc_mvec.c 168886 2007-04-20 05:06:02Z kmacy $"); 33 34#include <sys/param.h> 35#include <sys/systm.h> 36#include <sys/kernel.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mbuf.h> 40#include <sys/ktr.h> 41#include <sys/sf_buf.h> 42 43#include <machine/bus.h> 44#include <dev/cxgb/sys/mvec.h> 45#include "opt_zero.h" 46 47#include <vm/vm.h> 48#include <vm/vm_page.h> 49#include <vm/pmap.h> 50 51#ifdef DEBUG 52#define DPRINTF printf 53#else 54#define DPRINTF(...) 55#endif 56 57#ifdef INVARIANTS 58#define M_SANITY m_sanity 59#else 60#define M_SANITY(a, b) 61#endif 62 63#define MAX_BUFS 36 64#define MAX_HVEC 8 65 66extern uint32_t collapse_free; 67extern uint32_t mb_free_vec_free; 68 69struct mbuf_ext { 70 struct mbuf *me_m; 71 caddr_t me_base; 72 volatile u_int *me_refcnt; 73 int me_flags; 74 uint32_t me_offset; 75}; 76 77int 78_m_explode(struct mbuf *m) 79{ 80 int i, offset, type, first, len; 81 uint8_t *cl; 82 struct mbuf *m0, *head = NULL; 83 struct mbuf_vec *mv; 84 85#ifdef INVARIANTS 86 len = m->m_len; 87 m0 = m->m_next; 88 while (m0) { 89 KASSERT((m0->m_flags & M_PKTHDR) == 0, 90 ("pkthdr set on intermediate mbuf - pre")); 91 len += m0->m_len; 92 m0 = m0->m_next; 93 94 } 95 if (len != m->m_pkthdr.len) 96 panic("at start len=%d pktlen=%d", len, m->m_pkthdr.len); 97#endif 98 mv = mtomv(m); 99 first = mv->mv_first; 100 for (i = mv->mv_count + first - 1; i > first; i--) { 101 type = mbuf_vec_get_type(mv, i); 102 cl = mv->mv_vec[i].mi_base; 103 offset = mv->mv_vec[i].mi_offset; 104 len = mv->mv_vec[i].mi_len; 105 if (__predict_false(type == EXT_MBUF)) { 106 m0 = (struct mbuf *)cl; 107 KASSERT((m0->m_flags & M_EXT) == 0, ("M_EXT set on mbuf")); 108 m0->m_len = len; 109 m0->m_data = cl + offset; 110 goto skip_cluster; 111 112 } else if ((m0 = m_get(M_NOWAIT, MT_DATA)) == NULL) { 113 /* 114 * Check for extra memory leaks 115 */ 116 m_freem(head); 117 return (ENOMEM); 118 } 119 m0->m_flags = 0; 120 121 m_cljset(m0, (uint8_t *)cl, type); 122 m0->m_len = mv->mv_vec[i].mi_len; 123 if (offset) 124 m_adj(m0, offset); 125 skip_cluster: 126 m0->m_next = head; 127 m->m_len -= m0->m_len; 128 head = m0; 129 } 130 offset = mv->mv_vec[first].mi_offset; 131 cl = mv->mv_vec[first].mi_base; 132 type = mbuf_vec_get_type(mv, first); 133 m->m_flags &= ~(M_IOVEC); 134 m_cljset(m, cl, type); 135 if (offset) 136 m_adj(m, offset); 137 m->m_next = head; 138 head = m; 139 M_SANITY(m, 0); 140 141 return (0); 142} 143 144static __inline int 145m_vectorize(struct mbuf *m, int max, struct mbuf **vec, int *count) 146{ 147 int i, error = 0; 148 149 for (i = 0; i < max; i++) { 150 if (m == NULL) 151 break; 152#ifndef MBUF_PACKET_ZONE_DISABLE 153 if ((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_PACKET)) 154 return (EINVAL); 155#endif 156#ifdef ZERO_COPY_SOCKETS 157 if ((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_SFBUF)) 158 return (EINVAL); 159#endif 160 M_SANITY(m, 0); 161 vec[i] = m; 162 m = m->m_next; 163 } 164 if (m) 165 error = EFBIG; 166 167 *count = i; 168 169 return (error); 170} 171 172static __inline int 173m_findmbufs(struct mbuf **ivec, int maxbufs, struct mbuf_ext *ovec, int osize, int *ocount) 174{ 175 int i, j, nhbufsneed, nhbufs; 176 struct mbuf *m; 177 178 nhbufsneed = min(((maxbufs - 1)/MAX_MBUF_IOV) + 1, osize); 179 ovec[0].me_m = NULL; 180 181 for (nhbufs = j = i = 0; i < maxbufs && nhbufs < nhbufsneed; i++) { 182 if ((ivec[i]->m_flags & M_EXT) == 0) 183 continue; 184 m = ivec[i]; 185 ovec[nhbufs].me_m = m; 186 ovec[nhbufs].me_base = m->m_ext.ext_buf; 187 ovec[nhbufs].me_refcnt = m->m_ext.ref_cnt; 188 ovec[nhbufs].me_offset = (m->m_data - m->m_ext.ext_buf); 189 ovec[nhbufs].me_flags = m->m_ext.ext_type; 190 nhbufs++; 191 } 192 if (nhbufs == 0) { 193 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) 194 goto m_getfail; 195 ovec[nhbufs].me_m = m; 196 nhbufs = 1; 197 } 198 while (nhbufs < nhbufsneed) { 199 if ((m = m_get(M_NOWAIT, MT_DATA)) == NULL) 200 goto m_getfail; 201 ovec[nhbufs].me_m = m; 202 nhbufs++; 203 } 204 /* 205 * Copy over packet header to new head of chain 206 */ 207 if (ovec[0].me_m != ivec[0]) { 208 ovec[0].me_m->m_flags |= M_PKTHDR; 209 memcpy(&ovec[0].me_m->m_pkthdr, &ivec[0]->m_pkthdr, sizeof(struct pkthdr)); 210 SLIST_INIT(&ivec[0]->m_pkthdr.tags); 211 } 212 *ocount = nhbufs; 213 return (0); 214m_getfail: 215 for (i = 0; i < nhbufs; i++) 216 if ((ovec[i].me_m->m_flags & M_EXT) == 0) 217 uma_zfree(zone_mbuf, ovec[i].me_m); 218 return (ENOMEM); 219 220} 221 222static __inline void 223m_setiovec(struct mbuf_iovec *mi, struct mbuf *m, struct mbuf_ext *extvec, int *me_index, 224 int max_me_index) 225{ 226 int idx = *me_index; 227 228 mi->mi_len = m->m_len; 229 if (idx < max_me_index && extvec[idx].me_m == m) { 230 struct mbuf_ext *me = &extvec[idx]; 231 (*me_index)++; 232 mi->mi_base = me->me_base; 233 mi->mi_refcnt = me->me_refcnt; 234 mi->mi_offset = me->me_offset; 235 mi->mi_flags = me->me_flags; 236 } else if (m->m_flags & M_EXT) { 237 mi->mi_base = m->m_ext.ext_buf; 238 mi->mi_refcnt = m->m_ext.ref_cnt; 239 mi->mi_offset = 240 (m->m_data - m->m_ext.ext_buf); 241 mi->mi_flags = m->m_ext.ext_type; 242 } else { 243 KASSERT(m->m_len < 256, ("mbuf too large len=%d", 244 m->m_len)); 245 mi->mi_base = (uint8_t *)m; 246 mi->mi_refcnt = NULL; 247 mi->mi_offset = 248 (m->m_data - (caddr_t)m); 249 mi->mi_flags = EXT_MBUF; 250 } 251 DPRINTF("type=%d len=%d refcnt=%p cl=%p offset=0x%x\n", 252 mi->mi_flags, mi->mi_len, mi->mi_refcnt, mi->mi_base, 253 mi->mi_offset); 254} 255 256int 257_m_collapse(struct mbuf *m, int maxbufs, struct mbuf **mnew) 258{ 259 struct mbuf *m0, *lmvec[MAX_BUFS]; 260 struct mbuf **mnext; 261 struct mbuf **vec = lmvec; 262 struct mbuf *mhead = NULL; 263 struct mbuf_vec *mv; 264 int err, i, j, max, len, nhbufs; 265 struct mbuf_ext dvec[MAX_HVEC]; 266 int hidx = 0, dvecidx; 267 268 M_SANITY(m, 0); 269 if (maxbufs > MAX_BUFS) { 270 if ((vec = malloc(maxbufs * sizeof(struct mbuf *), 271 M_DEVBUF, M_NOWAIT)) == NULL) 272 return (ENOMEM); 273 } 274 275 if ((err = m_vectorize(m, maxbufs, vec, &max)) != 0) 276 goto out; 277 if ((err = m_findmbufs(vec, max, dvec, MAX_HVEC, &nhbufs)) != 0) 278 goto out; 279 280 KASSERT(max > 0, ("invalid mbuf count")); 281 KASSERT(nhbufs > 0, ("invalid header mbuf count")); 282 283 mhead = m0 = dvec[0].me_m; 284 285 DPRINTF("nbufs=%d nhbufs=%d\n", max, nhbufs); 286 for (hidx = dvecidx = i = 0, mnext = NULL; i < max; hidx++) { 287 m0 = dvec[hidx].me_m; 288 m0->m_flags &= ~M_EXT; 289 m0->m_flags |= M_IOVEC; 290 291 if (mnext) 292 *mnext = m0; 293 294 mv = mtomv(m0); 295 len = mv->mv_first = 0; 296 for (j = 0; j < MAX_MBUF_IOV && i < max; j++, i++) { 297 struct mbuf_iovec *mi = &mv->mv_vec[j]; 298 299 m_setiovec(mi, vec[i], dvec, &dvecidx, nhbufs); 300 len += mi->mi_len; 301 } 302 m0->m_data = mv->mv_vec[0].mi_base + mv->mv_vec[0].mi_offset; 303 mv->mv_count = j; 304 m0->m_len = len; 305 mnext = &m0->m_next; 306 DPRINTF("count=%d len=%d\n", j, len); 307 } 308 309 /* 310 * Terminate chain 311 */ 312 m0->m_next = NULL; 313 314 /* 315 * Free all mbufs not used by the mbuf iovec chain 316 */ 317 for (i = 0; i < max; i++) 318 if (vec[i]->m_flags & M_EXT) { 319 vec[i]->m_flags &= ~M_EXT; 320 collapse_free++; 321 uma_zfree(zone_mbuf, vec[i]); 322 } 323 324 *mnew = mhead; 325out: 326 if (vec != lmvec) 327 free(vec, M_DEVBUF); 328 return (err); 329} 330 331void 332mb_free_vec(struct mbuf *m) 333{ 334 struct mbuf_vec *mv; 335 int i; 336 337 KASSERT((m->m_flags & (M_EXT|M_IOVEC)) == M_IOVEC, 338 ("%s: M_EXT set", __func__)); 339 340 mv = mtomv(m); 341 KASSERT(mv->mv_count <= MAX_MBUF_IOV, 342 ("%s: mi_count too large %d", __func__, mv->mv_count)); 343 344 DPRINTF("count=%d len=%d\n", mv->mv_count, m->m_len); 345 for (i = mv->mv_first; i < mv->mv_count; i++) { 346 uma_zone_t zone = NULL; 347 volatile int *refcnt = mv->mv_vec[i].mi_refcnt; 348 int type = mbuf_vec_get_type(mv, i); 349 void *cl = mv->mv_vec[i].mi_base; 350 351 if (refcnt && *refcnt != 1 && atomic_fetchadd_int(refcnt, -1) != 1) 352 continue; 353 354 DPRINTF("freeing idx=%d refcnt=%p type=%d cl=%p\n", i, refcnt, type, cl); 355 switch (type) { 356 case EXT_MBUF: 357 mb_free_vec_free++; 358 case EXT_CLUSTER: 359 case EXT_JUMBOP: 360 case EXT_JUMBO9: 361 case EXT_JUMBO16: 362 zone = m_getzonefromtype(type); 363 uma_zfree(zone, cl); 364 continue; 365 case EXT_SFBUF: 366 *refcnt = 0; 367 uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, 368 refcnt)); 369#ifdef __i386__ 370 sf_buf_mext(cl, mv->mv_vec[i].mi_args); 371#else 372 /* 373 * Every architecture other than i386 uses a vm_page 374 * for an sf_buf (well ... sparc64 does but shouldn't) 375 */ 376 sf_buf_mext(cl, PHYS_TO_VM_PAGE(vtophys(cl))); 377#endif 378 continue; 379 default: 380 KASSERT(m->m_ext.ext_type == 0, 381 ("%s: unknown ext_type", __func__)); 382 break; 383 } 384 } 385 /* 386 * Free this mbuf back to the mbuf zone with all iovec 387 * information purged. 388 */ 389 mb_free_vec_free++; 390 uma_zfree(zone_mbuf, m); 391} 392 393#if (!defined(__sparc64__) && !defined(__sun4v__)) 394#include <sys/sysctl.h> 395 396#define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 397#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 398 399struct bounce_zone { 400 STAILQ_ENTRY(bounce_zone) links; 401 STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; 402 int total_bpages; 403 int free_bpages; 404 int reserved_bpages; 405 int active_bpages; 406 int total_bounced; 407 int total_deferred; 408 bus_size_t alignment; 409 bus_size_t boundary; 410 bus_addr_t lowaddr; 411 char zoneid[8]; 412 char lowaddrid[20]; 413 struct sysctl_ctx_list sysctl_tree; 414 struct sysctl_oid *sysctl_tree_top; 415}; 416struct bus_dma_tag { 417 bus_dma_tag_t parent; 418 bus_size_t alignment; 419 bus_size_t boundary; 420 bus_addr_t lowaddr; 421 bus_addr_t highaddr; 422 bus_dma_filter_t *filter; 423 void *filterarg; 424 bus_size_t maxsize; 425 u_int nsegments; 426 bus_size_t maxsegsz; 427 int flags; 428 int ref_count; 429 int map_count; 430 bus_dma_lock_t *lockfunc; 431 void *lockfuncarg; 432 bus_dma_segment_t *segments; 433 struct bounce_zone *bounce_zone; 434}; 435 436struct bus_dmamap { 437 struct bp_list bpages; 438 int pagesneeded; 439 int pagesreserved; 440 bus_dma_tag_t dmat; 441 void *buf; /* unmapped buffer pointer */ 442 bus_size_t buflen; /* unmapped buffer length */ 443 bus_dmamap_callback_t *callback; 444 void *callback_arg; 445 STAILQ_ENTRY(bus_dmamap) links; 446}; 447 448static struct bus_dmamap nobounce_dmamap; 449 450static __inline int 451run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) 452{ 453 int retval; 454 455 retval = 0; 456 457 do { 458 if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) 459 || ((paddr & (dmat->alignment - 1)) != 0)) 460 && (dmat->filter == NULL 461 || (*dmat->filter)(dmat->filterarg, paddr) != 0)) 462 retval = 1; 463 464 dmat = dmat->parent; 465 } while (retval == 0 && dmat != NULL); 466 return (retval); 467} 468 469static __inline int 470_bus_dmamap_load_buffer(bus_dma_tag_t dmat, 471 bus_dmamap_t map, 472 void *buf, bus_size_t buflen, 473 pmap_t pmap, 474 int flags, 475 bus_addr_t *lastaddrp, 476 bus_dma_segment_t *segs, 477 int *segp, 478 int first) 479{ 480 bus_size_t sgsize; 481 bus_addr_t curaddr, lastaddr, baddr, bmask; 482 vm_offset_t vaddr; 483 int needbounce = 0; 484 int seg; 485 486 if (map == NULL) 487 map = &nobounce_dmamap; 488 489 /* Reserve Necessary Bounce Pages */ 490 if (map->pagesneeded != 0) 491 panic("don't support bounce pages"); 492 493 vaddr = (vm_offset_t)buf; 494 lastaddr = *lastaddrp; 495 bmask = ~(dmat->boundary - 1); 496 497 for (seg = *segp; buflen > 0 ; ) { 498 /* 499 * Get the physical address for this segment. 500 */ 501 if (pmap) 502 curaddr = pmap_extract(pmap, vaddr); 503 else 504 curaddr = pmap_kextract(vaddr); 505 506 507 /* 508 * Compute the segment size, and adjust counts. 509 */ 510 sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK); 511 if (buflen < sgsize) 512 sgsize = buflen; 513 514 /* 515 * Make sure we don't cross any boundaries. 516 */ 517 if (dmat->boundary > 0) { 518 baddr = (curaddr + dmat->boundary) & bmask; 519 if (sgsize > (baddr - curaddr)) 520 sgsize = (baddr - curaddr); 521 } 522 523 if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) 524 panic("no bounce page support"); 525 526 /* 527 * Insert chunk into a segment, coalescing with 528 * previous segment if possible. 529 */ 530 if (first) { 531 segs[seg].ds_addr = curaddr; 532 segs[seg].ds_len = sgsize; 533 first = 0; 534 } else { 535 if (needbounce == 0 && curaddr == lastaddr && 536 (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && 537 (dmat->boundary == 0 || 538 (segs[seg].ds_addr & bmask) == (curaddr & bmask))) 539 segs[seg].ds_len += sgsize; 540 else { 541 if (++seg >= dmat->nsegments) 542 break; 543 segs[seg].ds_addr = curaddr; 544 segs[seg].ds_len = sgsize; 545 } 546 } 547 548 lastaddr = curaddr + sgsize; 549 vaddr += sgsize; 550 buflen -= sgsize; 551 } 552 553 *segp = seg; 554 *lastaddrp = lastaddr; 555 556 /* 557 * Did we fit? 558 */ 559 return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ 560} 561 562int 563bus_dmamap_load_mvec_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0, 564 bus_dma_segment_t *segs, int *nsegs, int flags) 565{ 566 int error, i; 567 568 M_ASSERTPKTHDR(m0); 569 570 if ((m0->m_flags & M_IOVEC) == 0) 571 return (bus_dmamap_load_mbuf_sg(dmat, map, m0, segs, nsegs, flags)); 572 573 flags |= BUS_DMA_NOWAIT; 574 *nsegs = 0; 575 error = 0; 576 if (m0->m_pkthdr.len <= dmat->maxsize) { 577 int first = 1; 578 bus_addr_t lastaddr = 0; 579 struct mbuf *m; 580 581 for (m = m0; m != NULL && error == 0; m = m->m_next) { 582 struct mbuf_vec *mv; 583 int count, firstcl; 584 if (!(m->m_len > 0)) 585 continue; 586 587 mv = mtomv(m); 588 count = mv->mv_count; 589 firstcl = mv->mv_first; 590 KASSERT(count <= MAX_MBUF_IOV, ("count=%d too large", count)); 591 for (i = firstcl; i < count && error == 0; i++) { 592 void *data = mv->mv_vec[i].mi_base + mv->mv_vec[i].mi_offset; 593 int len = mv->mv_vec[i].mi_len; 594 595 if (len == 0) 596 continue; 597 DPRINTF("mapping data=%p len=%d\n", data, len); 598 error = _bus_dmamap_load_buffer(dmat, NULL, 599 data, len, NULL, flags, &lastaddr, 600 segs, nsegs, first); 601 DPRINTF("%d: addr=0x%jx len=%ju\n", i, 602 (uintmax_t)segs[i].ds_addr, (uintmax_t)segs[i].ds_len); 603 first = 0; 604 } 605 } 606 } else { 607 error = EINVAL; 608 } 609 610 (*nsegs)++; 611 612 CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", 613 __func__, dmat, dmat->flags, error, *nsegs); 614 return (error); 615} 616#endif /* !__sparc64__ && !__sun4v__ */ 617