1/* Modified by Broadcom Corp. Portions Copyright (c) Broadcom Corp, 2012. */ 2/* 3 * "splice": joining two ropes together by interweaving their strands. 4 * 5 * This is the "extended pipe" functionality, where a pipe is used as 6 * an arbitrary in-memory buffer. Think of a pipe as a small kernel 7 * buffer that you can use to transfer data from one end to the other. 8 * 9 * The traditional unix read/write is extended with a "splice()" operation 10 * that transfers data buffers to or from a pipe buffer. 11 * 12 * Named by Larry McVoy, original implementation from Linus, extended by 13 * Jens to support splicing to files, network, direct splicing, etc and 14 * fixing lots of bugs. 15 * 16 * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk> 17 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> 18 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> 19 * 20 */ 21#include <linux/fs.h> 22#include <linux/file.h> 23#include <linux/pagemap.h> 24#include <linux/splice.h> 25#include <linux/memcontrol.h> 26#include <linux/mm_inline.h> 27#include <linux/swap.h> 28#include <linux/writeback.h> 29#include <linux/buffer_head.h> 30#include <linux/module.h> 31#include <linux/syscalls.h> 32#include <linux/uio.h> 33#include <linux/security.h> 34#include <linux/gfp.h> 35 36#include <typedefs.h> 37#include <bcmdefs.h> 38#if defined(CONFIG_BCM_RECVFILE) 39#include <net/sock.h> 40#endif /* CONFIG_BCM_RECVFILE */ 41 42/* 43 * Attempt to steal a page from a pipe buffer. This should perhaps go into 44 * a vm helper function, it's already simplified quite a bit by the 45 * addition of remove_mapping(). If success is returned, the caller may 46 * attempt to reuse this page for another destination. 47 */ 48static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, 49 struct pipe_buffer *buf) 50{ 51 struct page *page = buf->page; 52 struct address_space *mapping; 53 54 lock_page(page); 55 56 mapping = page_mapping(page); 57 if (mapping) { 58 WARN_ON(!PageUptodate(page)); 59 60 /* 61 * At least for ext2 with nobh option, we need to wait on 62 * writeback completing on this page, since we'll remove it 63 * from the pagecache. Otherwise truncate wont wait on the 64 * page, allowing the disk blocks to be reused by someone else 65 * before we actually wrote our data to them. fs corruption 66 * ensues. 67 */ 68 wait_on_page_writeback(page); 69 70 if (page_has_private(page) && 71 !try_to_release_page(page, GFP_KERNEL)) 72 goto out_unlock; 73 74 /* 75 * If we succeeded in removing the mapping, set LRU flag 76 * and return good. 77 */ 78 if (remove_mapping(mapping, page)) { 79 buf->flags |= PIPE_BUF_FLAG_LRU; 80 return 0; 81 } 82 } 83 84 /* 85 * Raced with truncate or failed to remove page from current 86 * address space, unlock and return failure. 87 */ 88out_unlock: 89 unlock_page(page); 90 return 1; 91} 92 93static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, 94 struct pipe_buffer *buf) 95{ 96 page_cache_release(buf->page); 97 buf->flags &= ~PIPE_BUF_FLAG_LRU; 98} 99 100/* 101 * Check whether the contents of buf is OK to access. Since the content 102 * is a page cache page, IO may be in flight. 103 */ 104static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, 105 struct pipe_buffer *buf) 106{ 107 struct page *page = buf->page; 108 int err; 109 110 if (!PageUptodate(page)) { 111 lock_page(page); 112 113 /* 114 * Page got truncated/unhashed. This will cause a 0-byte 115 * splice, if this is the first page. 116 */ 117 if (!page->mapping) { 118 err = -ENODATA; 119 goto error; 120 } 121 122 /* 123 * Uh oh, read-error from disk. 124 */ 125 if (!PageUptodate(page)) { 126 err = -EIO; 127 goto error; 128 } 129 130 /* 131 * Page is ok afterall, we are done. 132 */ 133 unlock_page(page); 134 } 135 136 return 0; 137error: 138 unlock_page(page); 139 return err; 140} 141 142static const struct pipe_buf_operations page_cache_pipe_buf_ops = { 143 .can_merge = 0, 144 .map = generic_pipe_buf_map, 145 .unmap = generic_pipe_buf_unmap, 146 .confirm = page_cache_pipe_buf_confirm, 147 .release = page_cache_pipe_buf_release, 148 .steal = page_cache_pipe_buf_steal, 149 .get = generic_pipe_buf_get, 150}; 151 152static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, 153 struct pipe_buffer *buf) 154{ 155 if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) 156 return 1; 157 158 buf->flags |= PIPE_BUF_FLAG_LRU; 159 return generic_pipe_buf_steal(pipe, buf); 160} 161 162static const struct pipe_buf_operations user_page_pipe_buf_ops = { 163 .can_merge = 0, 164 .map = generic_pipe_buf_map, 165 .unmap = generic_pipe_buf_unmap, 166 .confirm = generic_pipe_buf_confirm, 167 .release = page_cache_pipe_buf_release, 168 .steal = user_page_pipe_buf_steal, 169 .get = generic_pipe_buf_get, 170}; 171 172/** 173 * splice_to_pipe - fill passed data into a pipe 174 * @pipe: pipe to fill 175 * @spd: data to fill 176 * 177 * Description: 178 * @spd contains a map of pages and len/offset tuples, along with 179 * the struct pipe_buf_operations associated with these pages. This 180 * function will link that data to the pipe. 181 * 182 */ 183ssize_t splice_to_pipe(struct pipe_inode_info *pipe, 184 struct splice_pipe_desc *spd) 185{ 186 unsigned int spd_pages = spd->nr_pages; 187 int ret, do_wakeup, page_nr; 188 189 ret = 0; 190 do_wakeup = 0; 191 page_nr = 0; 192 193 pipe_lock(pipe); 194 195 for (;;) { 196 if (!pipe->readers) { 197 send_sig(SIGPIPE, current, 0); 198 if (!ret) 199 ret = -EPIPE; 200 break; 201 } 202 203 if (pipe->nrbufs < pipe->buffers) { 204 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 205 struct pipe_buffer *buf = pipe->bufs + newbuf; 206 207 buf->page = spd->pages[page_nr]; 208 buf->offset = spd->partial[page_nr].offset; 209 buf->len = spd->partial[page_nr].len; 210 buf->private = spd->partial[page_nr].private; 211 buf->ops = spd->ops; 212 if (spd->flags & SPLICE_F_GIFT) 213 buf->flags |= PIPE_BUF_FLAG_GIFT; 214 215 pipe->nrbufs++; 216 page_nr++; 217 ret += buf->len; 218 219 if (pipe->inode) 220 do_wakeup = 1; 221 222 if (!--spd->nr_pages) 223 break; 224 if (pipe->nrbufs < pipe->buffers) 225 continue; 226 227 break; 228 } 229 230 if (spd->flags & SPLICE_F_NONBLOCK) { 231 if (!ret) 232 ret = -EAGAIN; 233 break; 234 } 235 236 if (signal_pending(current)) { 237 if (!ret) 238 ret = -ERESTARTSYS; 239 break; 240 } 241 242 if (do_wakeup) { 243 smp_mb(); 244 if (waitqueue_active(&pipe->wait)) 245 wake_up_interruptible_sync(&pipe->wait); 246 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 247 do_wakeup = 0; 248 } 249 250 pipe->waiting_writers++; 251 pipe_wait(pipe); 252 pipe->waiting_writers--; 253 } 254 255 pipe_unlock(pipe); 256 257 if (do_wakeup) { 258 smp_mb(); 259 if (waitqueue_active(&pipe->wait)) 260 wake_up_interruptible(&pipe->wait); 261 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 262 } 263 264 while (page_nr < spd_pages) 265 spd->spd_release(spd, page_nr++); 266 267 return ret; 268} 269 270static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) 271{ 272 page_cache_release(spd->pages[i]); 273} 274 275/* 276 * Check if we need to grow the arrays holding pages and partial page 277 * descriptions. 278 */ 279int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) 280{ 281 if (pipe->buffers <= PIPE_DEF_BUFFERS) 282 return 0; 283 284 spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); 285 spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); 286 287 if (spd->pages && spd->partial) 288 return 0; 289 290 kfree(spd->pages); 291 kfree(spd->partial); 292 return -ENOMEM; 293} 294 295void splice_shrink_spd(struct pipe_inode_info *pipe, 296 struct splice_pipe_desc *spd) 297{ 298 if (pipe->buffers <= PIPE_DEF_BUFFERS) 299 return; 300 301 kfree(spd->pages); 302 kfree(spd->partial); 303} 304 305static int BCMFASTPATH_HOST 306__generic_file_splice_read(struct file *in, loff_t *ppos, 307 struct pipe_inode_info *pipe, size_t len, 308 unsigned int flags) 309{ 310 struct address_space *mapping = in->f_mapping; 311 unsigned int loff, nr_pages, req_pages; 312 struct page *pages[PIPE_DEF_BUFFERS]; 313 struct partial_page partial[PIPE_DEF_BUFFERS]; 314 struct page *page; 315 pgoff_t index, end_index; 316 loff_t isize; 317 int error, page_nr; 318 struct splice_pipe_desc spd = { 319 .pages = pages, 320 .partial = partial, 321 .flags = flags, 322 .ops = &page_cache_pipe_buf_ops, 323 .spd_release = spd_release_page, 324 }; 325 326 if (splice_grow_spd(pipe, &spd)) 327 return -ENOMEM; 328 329 index = *ppos >> PAGE_CACHE_SHIFT; 330 loff = *ppos & ~PAGE_CACHE_MASK; 331 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 332 nr_pages = min(req_pages, pipe->buffers); 333 334 /* 335 * Lookup the (hopefully) full range of pages we need. 336 */ 337 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); 338 index += spd.nr_pages; 339 340 /* 341 * If find_get_pages_contig() returned fewer pages than we needed, 342 * readahead/allocate the rest and fill in the holes. 343 */ 344 if (spd.nr_pages < nr_pages) 345 page_cache_sync_readahead(mapping, &in->f_ra, in, 346 index, req_pages - spd.nr_pages); 347 348 error = 0; 349 while (spd.nr_pages < nr_pages) { 350 /* 351 * Page could be there, find_get_pages_contig() breaks on 352 * the first hole. 353 */ 354 page = find_get_page(mapping, index); 355 if (!page) { 356 /* 357 * page didn't exist, allocate one. 358 */ 359 page = page_cache_alloc_cold(mapping); 360 if (!page) 361 break; 362 363 error = add_to_page_cache_lru(page, mapping, index, 364 GFP_KERNEL); 365 if (unlikely(error)) { 366 page_cache_release(page); 367 if (error == -EEXIST) 368 continue; 369 break; 370 } 371 /* 372 * add_to_page_cache() locks the page, unlock it 373 * to avoid convoluting the logic below even more. 374 */ 375 unlock_page(page); 376 } 377 378 spd.pages[spd.nr_pages++] = page; 379 index++; 380 } 381 382 /* 383 * Now loop over the map and see if we need to start IO on any 384 * pages, fill in the partial map, etc. 385 */ 386 index = *ppos >> PAGE_CACHE_SHIFT; 387 nr_pages = spd.nr_pages; 388 spd.nr_pages = 0; 389 for (page_nr = 0; page_nr < nr_pages; page_nr++) { 390 unsigned int this_len; 391 392 if (!len) 393 break; 394 395 /* 396 * this_len is the max we'll use from this page 397 */ 398 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); 399 page = spd.pages[page_nr]; 400 401 if (PageReadahead(page)) 402 page_cache_async_readahead(mapping, &in->f_ra, in, 403 page, index, req_pages - page_nr); 404 405 /* 406 * If the page isn't uptodate, we may need to start io on it 407 */ 408 if (!PageUptodate(page)) { 409 lock_page(page); 410 411 /* 412 * Page was truncated, or invalidated by the 413 * filesystem. Redo the find/create, but this time the 414 * page is kept locked, so there's no chance of another 415 * race with truncate/invalidate. 416 */ 417 if (!page->mapping) { 418 unlock_page(page); 419 page = find_or_create_page(mapping, index, 420 mapping_gfp_mask(mapping)); 421 422 if (!page) { 423 error = -ENOMEM; 424 break; 425 } 426 page_cache_release(spd.pages[page_nr]); 427 spd.pages[page_nr] = page; 428 } 429 /* 430 * page was already under io and is now done, great 431 */ 432 if (PageUptodate(page)) { 433 unlock_page(page); 434 goto fill_it; 435 } 436 437 /* 438 * need to read in the page 439 */ 440 error = mapping->a_ops->readpage(in, page); 441 if (unlikely(error)) { 442 /* 443 * We really should re-lookup the page here, 444 * but it complicates things a lot. Instead 445 * lets just do what we already stored, and 446 * we'll get it the next time we are called. 447 */ 448 if (error == AOP_TRUNCATED_PAGE) 449 error = 0; 450 451 break; 452 } 453 } 454fill_it: 455 /* 456 * i_size must be checked after PageUptodate. 457 */ 458 isize = i_size_read(mapping->host); 459 end_index = (isize - 1) >> PAGE_CACHE_SHIFT; 460 if (unlikely(!isize || index > end_index)) 461 break; 462 463 /* 464 * if this is the last page, see if we need to shrink 465 * the length and stop 466 */ 467 if (end_index == index) { 468 unsigned int plen; 469 470 /* 471 * max good bytes in this page 472 */ 473 plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; 474 if (plen <= loff) 475 break; 476 477 /* 478 * force quit after adding this page 479 */ 480 this_len = min(this_len, plen - loff); 481 len = this_len; 482 } 483 484 spd.partial[page_nr].offset = loff; 485 spd.partial[page_nr].len = this_len; 486 len -= this_len; 487 loff = 0; 488 spd.nr_pages++; 489 index++; 490 } 491 492 /* 493 * Release any pages at the end, if we quit early. 'page_nr' is how far 494 * we got, 'nr_pages' is how many pages are in the map. 495 */ 496 while (page_nr < nr_pages) 497 page_cache_release(spd.pages[page_nr++]); 498 in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 499 500 if (spd.nr_pages) 501 error = splice_to_pipe(pipe, &spd); 502 503 splice_shrink_spd(pipe, &spd); 504 return error; 505} 506 507/** 508 * generic_file_splice_read - splice data from file to a pipe 509 * @in: file to splice from 510 * @ppos: position in @in 511 * @pipe: pipe to splice to 512 * @len: number of bytes to splice 513 * @flags: splice modifier flags 514 * 515 * Description: 516 * Will read pages from given file and fill them into a pipe. Can be 517 * used as long as the address_space operations for the source implements 518 * a readpage() hook. 519 * 520 */ 521ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, 522 struct pipe_inode_info *pipe, size_t len, 523 unsigned int flags) 524{ 525 loff_t isize, left; 526 int ret; 527 528 isize = i_size_read(in->f_mapping->host); 529 if (unlikely(*ppos >= isize)) 530 return 0; 531 532 left = isize - *ppos; 533 if (unlikely(left < len)) 534 len = left; 535 536 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 537 if (ret > 0) { 538 *ppos += ret; 539 file_accessed(in); 540 } 541 542 return ret; 543} 544EXPORT_SYMBOL(generic_file_splice_read); 545 546static const struct pipe_buf_operations default_pipe_buf_ops = { 547 .can_merge = 0, 548 .map = generic_pipe_buf_map, 549 .unmap = generic_pipe_buf_unmap, 550 .confirm = generic_pipe_buf_confirm, 551 .release = generic_pipe_buf_release, 552 .steal = generic_pipe_buf_steal, 553 .get = generic_pipe_buf_get, 554}; 555 556static ssize_t kernel_readv(struct file *file, const struct iovec *vec, 557 unsigned long vlen, loff_t offset) 558{ 559 mm_segment_t old_fs; 560 loff_t pos = offset; 561 ssize_t res; 562 563 old_fs = get_fs(); 564 set_fs(get_ds()); 565 /* The cast to a user pointer is valid due to the set_fs() */ 566 res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos); 567 set_fs(old_fs); 568 569 return res; 570} 571 572static ssize_t kernel_write(struct file *file, const char *buf, size_t count, 573 loff_t pos) 574{ 575 mm_segment_t old_fs; 576 ssize_t res; 577 578 old_fs = get_fs(); 579 set_fs(get_ds()); 580 /* The cast to a user pointer is valid due to the set_fs() */ 581 res = vfs_write(file, (const char __user *)buf, count, &pos); 582 set_fs(old_fs); 583 584 return res; 585} 586 587ssize_t default_file_splice_read(struct file *in, loff_t *ppos, 588 struct pipe_inode_info *pipe, size_t len, 589 unsigned int flags) 590{ 591 unsigned int nr_pages; 592 unsigned int nr_freed; 593 size_t offset; 594 struct page *pages[PIPE_DEF_BUFFERS]; 595 struct partial_page partial[PIPE_DEF_BUFFERS]; 596 struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; 597 ssize_t res; 598 size_t this_len; 599 int error; 600 int i; 601 struct splice_pipe_desc spd = { 602 .pages = pages, 603 .partial = partial, 604 .flags = flags, 605 .ops = &default_pipe_buf_ops, 606 .spd_release = spd_release_page, 607 }; 608 609 if (splice_grow_spd(pipe, &spd)) 610 return -ENOMEM; 611 612 res = -ENOMEM; 613 vec = __vec; 614 if (pipe->buffers > PIPE_DEF_BUFFERS) { 615 vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); 616 if (!vec) 617 goto shrink_ret; 618 } 619 620 offset = *ppos & ~PAGE_CACHE_MASK; 621 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 622 623 for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { 624 struct page *page; 625 626 page = alloc_page(GFP_USER); 627 error = -ENOMEM; 628 if (!page) 629 goto err; 630 631 this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); 632 vec[i].iov_base = (void __user *) page_address(page); 633 vec[i].iov_len = this_len; 634 spd.pages[i] = page; 635 spd.nr_pages++; 636 len -= this_len; 637 offset = 0; 638 } 639 640 res = kernel_readv(in, vec, spd.nr_pages, *ppos); 641 if (res < 0) { 642 error = res; 643 goto err; 644 } 645 646 error = 0; 647 if (!res) 648 goto err; 649 650 nr_freed = 0; 651 for (i = 0; i < spd.nr_pages; i++) { 652 this_len = min_t(size_t, vec[i].iov_len, res); 653 spd.partial[i].offset = 0; 654 spd.partial[i].len = this_len; 655 if (!this_len) { 656 __free_page(spd.pages[i]); 657 spd.pages[i] = NULL; 658 nr_freed++; 659 } 660 res -= this_len; 661 } 662 spd.nr_pages -= nr_freed; 663 664 res = splice_to_pipe(pipe, &spd); 665 if (res > 0) 666 *ppos += res; 667 668shrink_ret: 669 if (vec != __vec) 670 kfree(vec); 671 splice_shrink_spd(pipe, &spd); 672 return res; 673 674err: 675 for (i = 0; i < spd.nr_pages; i++) 676 __free_page(spd.pages[i]); 677 678 res = error; 679 goto shrink_ret; 680} 681EXPORT_SYMBOL(default_file_splice_read); 682 683/* 684 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 685 * using sendpage(). Return the number of bytes sent. 686 */ 687static int pipe_to_sendpage(struct pipe_inode_info *pipe, 688 struct pipe_buffer *buf, struct splice_desc *sd) 689{ 690 struct file *file = sd->u.file; 691 loff_t pos = sd->pos; 692 int ret, more; 693 694 ret = buf->ops->confirm(pipe, buf); 695 if (!ret) { 696 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 697 if (file->f_op && file->f_op->sendpage) 698 ret = file->f_op->sendpage(file, buf->page, buf->offset, 699 sd->len, &pos, more); 700 else 701 ret = -EINVAL; 702 } 703 704 return ret; 705} 706 707/* 708 * This is a little more tricky than the file -> pipe splicing. There are 709 * basically three cases: 710 * 711 * - Destination page already exists in the address space and there 712 * are users of it. For that case we have no other option that 713 * copying the data. Tough luck. 714 * - Destination page already exists in the address space, but there 715 * are no users of it. Make sure it's uptodate, then drop it. Fall 716 * through to last case. 717 * - Destination page does not exist, we can add the pipe page to 718 * the page cache and avoid the copy. 719 * 720 * If asked to move pages to the output file (SPLICE_F_MOVE is set in 721 * sd->flags), we attempt to migrate pages from the pipe to the output 722 * file address space page cache. This is possible if no one else has 723 * the pipe page referenced outside of the pipe and page cache. If 724 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 725 * a new page in the output file page cache and fill/dirty that. 726 */ 727int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 728 struct splice_desc *sd) 729{ 730 struct file *file = sd->u.file; 731 struct address_space *mapping = file->f_mapping; 732 unsigned int offset, this_len; 733 struct page *page; 734 void *fsdata; 735 int ret; 736 737 /* 738 * make sure the data in this buffer is uptodate 739 */ 740 ret = buf->ops->confirm(pipe, buf); 741 if (unlikely(ret)) 742 return ret; 743 744 offset = sd->pos & ~PAGE_CACHE_MASK; 745 746 this_len = sd->len; 747 if (this_len + offset > PAGE_CACHE_SIZE) 748 this_len = PAGE_CACHE_SIZE - offset; 749 750 ret = pagecache_write_begin(file, mapping, sd->pos, this_len, 751 AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); 752 if (unlikely(ret)) 753 goto out; 754 755 if (buf->page != page) { 756 /* 757 * Careful, ->map() uses KM_USER0! 758 */ 759 char *src = buf->ops->map(pipe, buf, 1); 760 char *dst = kmap_atomic(page, KM_USER1); 761 762 memcpy(dst + offset, src + buf->offset, this_len); 763 flush_dcache_page(page); 764 kunmap_atomic(dst, KM_USER1); 765 buf->ops->unmap(pipe, buf, src); 766 } 767 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, 768 page, fsdata); 769out: 770 return ret; 771} 772EXPORT_SYMBOL(pipe_to_file); 773 774static void wakeup_pipe_writers(struct pipe_inode_info *pipe) 775{ 776 smp_mb(); 777 if (waitqueue_active(&pipe->wait)) 778 wake_up_interruptible(&pipe->wait); 779 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 780} 781 782/** 783 * splice_from_pipe_feed - feed available data from a pipe to a file 784 * @pipe: pipe to splice from 785 * @sd: information to @actor 786 * @actor: handler that splices the data 787 * 788 * Description: 789 * This function loops over the pipe and calls @actor to do the 790 * actual moving of a single struct pipe_buffer to the desired 791 * destination. It returns when there's no more buffers left in 792 * the pipe or if the requested number of bytes (@sd->total_len) 793 * have been copied. It returns a positive number (one) if the 794 * pipe needs to be filled with more data, zero if the required 795 * number of bytes have been copied and -errno on error. 796 * 797 * This, together with splice_from_pipe_{begin,end,next}, may be 798 * used to implement the functionality of __splice_from_pipe() when 799 * locking is required around copying the pipe buffers to the 800 * destination. 801 */ 802int BCMFASTPATH_HOST splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, 803 splice_actor *actor) 804{ 805 int ret; 806 807 while (pipe->nrbufs) { 808 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 809 const struct pipe_buf_operations *ops = buf->ops; 810 811 sd->len = buf->len; 812 if (sd->len > sd->total_len) 813 sd->len = sd->total_len; 814 815 ret = actor(pipe, buf, sd); 816 if (ret <= 0) { 817 if (ret == -ENODATA) 818 ret = 0; 819 return ret; 820 } 821 buf->offset += ret; 822 buf->len -= ret; 823 824 sd->num_spliced += ret; 825 sd->len -= ret; 826 sd->pos += ret; 827 sd->total_len -= ret; 828 829 if (!buf->len) { 830 buf->ops = NULL; 831 ops->release(pipe, buf); 832 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); 833 pipe->nrbufs--; 834 if (pipe->inode) 835 sd->need_wakeup = true; 836 } 837 838 if (!sd->total_len) 839 return 0; 840 } 841 842 return 1; 843} 844EXPORT_SYMBOL(splice_from_pipe_feed); 845 846/** 847 * splice_from_pipe_next - wait for some data to splice from 848 * @pipe: pipe to splice from 849 * @sd: information about the splice operation 850 * 851 * Description: 852 * This function will wait for some data and return a positive 853 * value (one) if pipe buffers are available. It will return zero 854 * or -errno if no more data needs to be spliced. 855 */ 856int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) 857{ 858 while (!pipe->nrbufs) { 859 if (!pipe->writers) 860 return 0; 861 862 if (!pipe->waiting_writers && sd->num_spliced) 863 return 0; 864 865 if (sd->flags & SPLICE_F_NONBLOCK) 866 return -EAGAIN; 867 868 if (signal_pending(current)) 869 return -ERESTARTSYS; 870 871 if (sd->need_wakeup) { 872 wakeup_pipe_writers(pipe); 873 sd->need_wakeup = false; 874 } 875 876 pipe_wait(pipe); 877 } 878 879 return 1; 880} 881EXPORT_SYMBOL(splice_from_pipe_next); 882 883/** 884 * splice_from_pipe_begin - start splicing from pipe 885 * @sd: information about the splice operation 886 * 887 * Description: 888 * This function should be called before a loop containing 889 * splice_from_pipe_next() and splice_from_pipe_feed() to 890 * initialize the necessary fields of @sd. 891 */ 892void splice_from_pipe_begin(struct splice_desc *sd) 893{ 894 sd->num_spliced = 0; 895 sd->need_wakeup = false; 896} 897EXPORT_SYMBOL(splice_from_pipe_begin); 898 899/** 900 * splice_from_pipe_end - finish splicing from pipe 901 * @pipe: pipe to splice from 902 * @sd: information about the splice operation 903 * 904 * Description: 905 * This function will wake up pipe writers if necessary. It should 906 * be called after a loop containing splice_from_pipe_next() and 907 * splice_from_pipe_feed(). 908 */ 909void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) 910{ 911 if (sd->need_wakeup) 912 wakeup_pipe_writers(pipe); 913} 914EXPORT_SYMBOL(splice_from_pipe_end); 915 916/** 917 * __splice_from_pipe - splice data from a pipe to given actor 918 * @pipe: pipe to splice from 919 * @sd: information to @actor 920 * @actor: handler that splices the data 921 * 922 * Description: 923 * This function does little more than loop over the pipe and call 924 * @actor to do the actual moving of a single struct pipe_buffer to 925 * the desired destination. See pipe_to_file, pipe_to_sendpage, or 926 * pipe_to_user. 927 * 928 */ 929ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, 930 splice_actor *actor) 931{ 932 int ret; 933 934 splice_from_pipe_begin(sd); 935 do { 936 ret = splice_from_pipe_next(pipe, sd); 937 if (ret > 0) 938 ret = splice_from_pipe_feed(pipe, sd, actor); 939 } while (ret > 0); 940 splice_from_pipe_end(pipe, sd); 941 942 return sd->num_spliced ? sd->num_spliced : ret; 943} 944EXPORT_SYMBOL(__splice_from_pipe); 945 946/** 947 * splice_from_pipe - splice data from a pipe to a file 948 * @pipe: pipe to splice from 949 * @out: file to splice to 950 * @ppos: position in @out 951 * @len: how many bytes to splice 952 * @flags: splice modifier flags 953 * @actor: handler that splices the data 954 * 955 * Description: 956 * See __splice_from_pipe. This function locks the pipe inode, 957 * otherwise it's identical to __splice_from_pipe(). 958 * 959 */ 960ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, 961 loff_t *ppos, size_t len, unsigned int flags, 962 splice_actor *actor) 963{ 964 ssize_t ret; 965 struct splice_desc sd = { 966 .total_len = len, 967 .flags = flags, 968 .pos = *ppos, 969 .u.file = out, 970 }; 971 972 pipe_lock(pipe); 973 ret = __splice_from_pipe(pipe, &sd, actor); 974 pipe_unlock(pipe); 975 976 return ret; 977} 978 979/** 980 * generic_file_splice_write - splice data from a pipe to a file 981 * @pipe: pipe info 982 * @out: file to write to 983 * @ppos: position in @out 984 * @len: number of bytes to splice 985 * @flags: splice modifier flags 986 * 987 * Description: 988 * Will either move or copy pages (determined by @flags options) from 989 * the given pipe inode to the given file. 990 * 991 */ 992ssize_t 993generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 994 loff_t *ppos, size_t len, unsigned int flags) 995{ 996 struct address_space *mapping = out->f_mapping; 997 struct inode *inode = mapping->host; 998 struct splice_desc sd = { 999 .total_len = len, 1000 .flags = flags, 1001 .pos = *ppos, 1002 .u.file = out, 1003 }; 1004 ssize_t ret; 1005 1006 pipe_lock(pipe); 1007 1008 splice_from_pipe_begin(&sd); 1009 do { 1010 ret = splice_from_pipe_next(pipe, &sd); 1011 if (ret <= 0) 1012 break; 1013 1014 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 1015 ret = file_remove_suid(out); 1016 if (!ret) { 1017 file_update_time(out); 1018 ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); 1019 } 1020 mutex_unlock(&inode->i_mutex); 1021 } while (ret > 0); 1022 splice_from_pipe_end(pipe, &sd); 1023 1024 pipe_unlock(pipe); 1025 1026 if (sd.num_spliced) 1027 ret = sd.num_spliced; 1028 1029 if (ret > 0) { 1030 unsigned long nr_pages; 1031 int err; 1032 1033 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1034 1035 err = generic_write_sync(out, *ppos, ret); 1036 if (err) 1037 ret = err; 1038 else 1039 *ppos += ret; 1040 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 1041 } 1042 1043 return ret; 1044} 1045 1046EXPORT_SYMBOL(generic_file_splice_write); 1047 1048static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 1049 struct splice_desc *sd) 1050{ 1051 int ret; 1052 void *data; 1053 1054 ret = buf->ops->confirm(pipe, buf); 1055 if (ret) 1056 return ret; 1057 1058 data = buf->ops->map(pipe, buf, 0); 1059 ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); 1060 buf->ops->unmap(pipe, buf, data); 1061 1062 return ret; 1063} 1064 1065static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, 1066 struct file *out, loff_t *ppos, 1067 size_t len, unsigned int flags) 1068{ 1069 ssize_t ret; 1070 1071 ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf); 1072 if (ret > 0) 1073 *ppos += ret; 1074 1075 return ret; 1076} 1077 1078/** 1079 * generic_splice_sendpage - splice data from a pipe to a socket 1080 * @pipe: pipe to splice from 1081 * @out: socket to write to 1082 * @ppos: position in @out 1083 * @len: number of bytes to splice 1084 * @flags: splice modifier flags 1085 * 1086 * Description: 1087 * Will send @len bytes from the pipe to a network socket. No data copying 1088 * is involved. 1089 * 1090 */ 1091ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 1092 loff_t *ppos, size_t len, unsigned int flags) 1093{ 1094 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 1095} 1096 1097EXPORT_SYMBOL(generic_splice_sendpage); 1098 1099/* 1100 * Attempt to initiate a splice from pipe to file. 1101 */ 1102static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 1103 loff_t *ppos, size_t len, unsigned int flags) 1104{ 1105 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, 1106 loff_t *, size_t, unsigned int); 1107 int ret; 1108 1109 if (unlikely(!(out->f_mode & FMODE_WRITE))) 1110 return -EBADF; 1111 1112 if (unlikely(out->f_flags & O_APPEND)) 1113 return -EINVAL; 1114 1115 ret = rw_verify_area(WRITE, out, ppos, len); 1116 if (unlikely(ret < 0)) 1117 return ret; 1118 1119 if (out->f_op && out->f_op->splice_write) 1120 splice_write = out->f_op->splice_write; 1121 else 1122 splice_write = default_file_splice_write; 1123 1124 return splice_write(pipe, out, ppos, len, flags); 1125} 1126 1127/* 1128 * Attempt to initiate a splice from a file to a pipe. 1129 */ 1130static long do_splice_to(struct file *in, loff_t *ppos, 1131 struct pipe_inode_info *pipe, size_t len, 1132 unsigned int flags) 1133{ 1134 ssize_t (*splice_read)(struct file *, loff_t *, 1135 struct pipe_inode_info *, size_t, unsigned int); 1136 int ret; 1137 1138 if (unlikely(!(in->f_mode & FMODE_READ))) 1139 return -EBADF; 1140 1141 ret = rw_verify_area(READ, in, ppos, len); 1142 if (unlikely(ret < 0)) 1143 return ret; 1144 1145 if (in->f_op && in->f_op->splice_read) 1146 splice_read = in->f_op->splice_read; 1147 else 1148 splice_read = default_file_splice_read; 1149 1150 return splice_read(in, ppos, pipe, len, flags); 1151} 1152 1153/** 1154 * splice_direct_to_actor - splices data directly between two non-pipes 1155 * @in: file to splice from 1156 * @sd: actor information on where to splice to 1157 * @actor: handles the data splicing 1158 * 1159 * Description: 1160 * This is a special case helper to splice directly between two 1161 * points, without requiring an explicit pipe. Internally an allocated 1162 * pipe is cached in the process, and reused during the lifetime of 1163 * that process. 1164 * 1165 */ 1166ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, 1167 splice_direct_actor *actor) 1168{ 1169 struct pipe_inode_info *pipe; 1170 long ret, bytes; 1171 umode_t i_mode; 1172 size_t len; 1173 int i, flags; 1174 1175 /* 1176 * We require the input being a regular file, as we don't want to 1177 * randomly drop data for eg socket -> socket splicing. Use the 1178 * piped splicing for that! 1179 */ 1180 i_mode = in->f_path.dentry->d_inode->i_mode; 1181 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) 1182 return -EINVAL; 1183 1184 /* 1185 * neither in nor out is a pipe, setup an internal pipe attached to 1186 * 'out' and transfer the wanted data from 'in' to 'out' through that 1187 */ 1188 pipe = current->splice_pipe; 1189 if (unlikely(!pipe)) { 1190 pipe = alloc_pipe_info(NULL); 1191 if (!pipe) 1192 return -ENOMEM; 1193 1194 /* 1195 * We don't have an immediate reader, but we'll read the stuff 1196 * out of the pipe right after the splice_to_pipe(). So set 1197 * PIPE_READERS appropriately. 1198 */ 1199 pipe->readers = 1; 1200 1201 current->splice_pipe = pipe; 1202 } 1203 1204 /* 1205 * Do the splice. 1206 */ 1207 ret = 0; 1208 bytes = 0; 1209 len = sd->total_len; 1210 flags = sd->flags; 1211 1212 /* 1213 * Don't block on output, we have to drain the direct pipe. 1214 */ 1215 sd->flags &= ~SPLICE_F_NONBLOCK; 1216 1217 while (len) { 1218 size_t read_len; 1219 loff_t pos = sd->pos, prev_pos = pos; 1220 1221 ret = do_splice_to(in, &pos, pipe, len, flags); 1222 if (unlikely(ret <= 0)) 1223 goto out_release; 1224 1225 read_len = ret; 1226 sd->total_len = read_len; 1227 1228 /* 1229 * NOTE: nonblocking mode only applies to the input. We 1230 * must not do the output in nonblocking mode as then we 1231 * could get stuck data in the internal pipe: 1232 */ 1233 ret = actor(pipe, sd); 1234 if (unlikely(ret <= 0)) { 1235 sd->pos = prev_pos; 1236 goto out_release; 1237 } 1238 1239 bytes += ret; 1240 len -= ret; 1241 sd->pos = pos; 1242 1243 if (ret < read_len) { 1244 sd->pos = prev_pos + ret; 1245 goto out_release; 1246 } 1247 } 1248 1249done: 1250 pipe->nrbufs = pipe->curbuf = 0; 1251 file_accessed(in); 1252 return bytes; 1253 1254out_release: 1255 /* 1256 * If we did an incomplete transfer we must release 1257 * the pipe buffers in question: 1258 */ 1259 for (i = 0; i < pipe->buffers; i++) { 1260 struct pipe_buffer *buf = pipe->bufs + i; 1261 1262 if (buf->ops) { 1263 buf->ops->release(pipe, buf); 1264 buf->ops = NULL; 1265 } 1266 } 1267 1268 if (!bytes) 1269 bytes = ret; 1270 1271 goto done; 1272} 1273EXPORT_SYMBOL(splice_direct_to_actor); 1274 1275static int direct_splice_actor(struct pipe_inode_info *pipe, 1276 struct splice_desc *sd) 1277{ 1278 struct file *file = sd->u.file; 1279 1280 return do_splice_from(pipe, file, &file->f_pos, sd->total_len, 1281 sd->flags); 1282} 1283 1284/** 1285 * do_splice_direct - splices data directly between two files 1286 * @in: file to splice from 1287 * @ppos: input file offset 1288 * @out: file to splice to 1289 * @len: number of bytes to splice 1290 * @flags: splice modifier flags 1291 * 1292 * Description: 1293 * For use by do_sendfile(). splice can easily emulate sendfile, but 1294 * doing it in the application would incur an extra system call 1295 * (splice in + splice out, as compared to just sendfile()). So this helper 1296 * can splice directly through a process-private pipe. 1297 * 1298 */ 1299long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 1300 size_t len, unsigned int flags) 1301{ 1302 struct splice_desc sd = { 1303 .len = len, 1304 .total_len = len, 1305 .flags = flags, 1306 .pos = *ppos, 1307 .u.file = out, 1308 }; 1309 long ret; 1310 1311 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 1312 if (ret > 0) 1313 *ppos = sd.pos; 1314 1315 return ret; 1316} 1317 1318static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 1319 struct pipe_inode_info *opipe, 1320 size_t len, unsigned int flags); 1321 1322/* 1323 * Determine where to splice to/from. 1324 */ 1325static long do_splice(struct file *in, loff_t __user *off_in, 1326 struct file *out, loff_t __user *off_out, 1327 size_t len, unsigned int flags) 1328{ 1329 struct pipe_inode_info *ipipe; 1330 struct pipe_inode_info *opipe; 1331 loff_t offset, *off; 1332 long ret; 1333 1334 ipipe = get_pipe_info(in); 1335 opipe = get_pipe_info(out); 1336 1337 if (ipipe && opipe) { 1338 if (off_in || off_out) 1339 return -ESPIPE; 1340 1341 if (!(in->f_mode & FMODE_READ)) 1342 return -EBADF; 1343 1344 if (!(out->f_mode & FMODE_WRITE)) 1345 return -EBADF; 1346 1347 /* Splicing to self would be fun, but... */ 1348 if (ipipe == opipe) 1349 return -EINVAL; 1350 1351 return splice_pipe_to_pipe(ipipe, opipe, len, flags); 1352 } 1353 1354 if (ipipe) { 1355 if (off_in) 1356 return -ESPIPE; 1357 if (off_out) { 1358 if (!(out->f_mode & FMODE_PWRITE)) 1359 return -EINVAL; 1360 if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1361 return -EFAULT; 1362 off = &offset; 1363 } else 1364 off = &out->f_pos; 1365 1366 ret = do_splice_from(ipipe, out, off, len, flags); 1367 1368 if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) 1369 ret = -EFAULT; 1370 1371 return ret; 1372 } 1373 1374 if (opipe) { 1375 if (off_out) 1376 return -ESPIPE; 1377 if (off_in) { 1378 if (!(in->f_mode & FMODE_PREAD)) 1379 return -EINVAL; 1380 if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1381 return -EFAULT; 1382 off = &offset; 1383 } else 1384 off = &in->f_pos; 1385 1386 ret = do_splice_to(in, off, opipe, len, flags); 1387 1388 if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) 1389 ret = -EFAULT; 1390 1391 return ret; 1392 } 1393 1394 return -EINVAL; 1395} 1396 1397#if defined(CONFIG_BCM_RECVFILE) 1398/* Copy data directly from socket to file(pagecache) */ 1399static ssize_t BCMFASTPATH_HOST do_splice_from_socket(struct file *file, struct socket *sock, 1400 loff_t __user *off_out, size_t count) 1401{ 1402 struct address_space *mapping = file->f_mapping; 1403 struct inode *inode = mapping->host; 1404 loff_t pos, start_pos; 1405 int count_tmp, copied_bytes; 1406 int err = 0; 1407 int idx; 1408 int cPagesAllocated = 0; 1409 struct recvfile_ctl_blk *rv_cb; 1410 struct kvec *iov; 1411 struct msghdr msg; 1412 long rcvtimeo; 1413 int ret; 1414 1415 if (count > MAX_PAGES_PER_RECVFILE * PAGE_SIZE) { 1416 printk(KERN_WARNING "%s: count(%u) exceeds maxinum\n", __func__, count); 1417 return -EINVAL; 1418 } 1419 1420 if (off_out) { 1421 if (copy_from_user(&start_pos, off_out, sizeof(loff_t))) 1422 return -EFAULT; 1423 } else { 1424 return -EINVAL; 1425 } 1426 1427 pos = start_pos; 1428 1429 rv_cb = kmalloc(MAX_PAGES_PER_RECVFILE * sizeof(struct recvfile_ctl_blk), GFP_KERNEL); 1430 if (!rv_cb) { 1431 printk(KERN_WARNING "%s:memory allocation for rcv_cb failed\n", __func__); 1432 return -ENOMEM; 1433 } 1434 1435 iov = kmalloc(MAX_PAGES_PER_RECVFILE * sizeof(struct kvec), GFP_KERNEL); 1436 if (!iov) { 1437 kfree(rv_cb); 1438 printk(KERN_WARNING "%s:memory allocation for iov failed\n", __func__); 1439 return -ENOMEM; 1440 } 1441 1442 mutex_lock(&inode->i_mutex); 1443 1444 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 1445 1446 /* We can write back this queue in page reclaim */ 1447 current->backing_dev_info = mapping->backing_dev_info; 1448 1449 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1450 if (err != 0 || count == 0) 1451 goto done; 1452 1453 file_remove_suid(file); 1454 file_update_time(file); 1455 1456 count_tmp = count; 1457 do { 1458 unsigned long bytes; /* Bytes to write to page */ 1459 unsigned long offset; /* Offset into pagecache page */ 1460 struct page *pageP; 1461 void *fsdata; 1462 1463 offset = (pos & (PAGE_CACHE_SIZE - 1)); 1464 bytes = PAGE_CACHE_SIZE - offset; 1465 if (bytes > count_tmp) 1466 bytes = count_tmp; 1467 1468 ret = mapping->a_ops->write_begin(file, mapping, pos, bytes, 1469 AOP_FLAG_UNINTERRUPTIBLE, 1470 &pageP, &fsdata); 1471 1472 if (unlikely(ret)) { 1473 err = ret; 1474 for (idx = 0; idx < cPagesAllocated; idx++) { 1475 kunmap(rv_cb[idx].rv_page); 1476 ret = mapping->a_ops->write_end(file, mapping, 1477 rv_cb[idx].rv_pos, 1478 rv_cb[idx].rv_count, 1479 0, 1480 rv_cb[idx].rv_page, 1481 rv_cb[idx].rv_fsdata); 1482 } 1483 goto done; 1484 } 1485 rv_cb[cPagesAllocated].rv_page = pageP; 1486 rv_cb[cPagesAllocated].rv_pos = pos; 1487 rv_cb[cPagesAllocated].rv_count = bytes; 1488 rv_cb[cPagesAllocated].rv_fsdata = fsdata; 1489 iov[cPagesAllocated].iov_base = kmap(pageP) + offset; 1490 iov[cPagesAllocated].iov_len = bytes; 1491 cPagesAllocated++; 1492 count_tmp -= bytes; 1493 pos += bytes; 1494 } while (count_tmp); 1495 1496 /* IOV is ready, receive the data from socket now */ 1497 msg.msg_name = NULL; 1498 msg.msg_namelen = 0; 1499 msg.msg_iov = (struct iovec *)&iov[0]; 1500 msg.msg_iovlen = cPagesAllocated ; 1501 msg.msg_control = NULL; 1502 msg.msg_controllen = 0; 1503 msg.msg_flags = MSG_KERNSPACE; 1504 rcvtimeo = sock->sk->sk_rcvtimeo; 1505 sock->sk->sk_rcvtimeo = 8 * HZ; 1506 1507 ret = kernel_recvmsg(sock, &msg, &iov[0], cPagesAllocated, count, 1508 MSG_WAITALL | MSG_NOCATCHSIG); 1509 1510 sock->sk->sk_rcvtimeo = rcvtimeo; 1511 1512 if (unlikely(ret != count)) { 1513 if (ret < 0) { 1514 err = -EPIPE; 1515 count = 0; 1516 } else { 1517 /* We have read some data from socket */ 1518 count = ret; 1519 } 1520 } else { 1521 err = 0; 1522 } 1523 1524 /* Adjust the pagecache pages len based on the amount of data copied 1525 * truncate the pages which are not used 1526 */ 1527 count_tmp = count; 1528 1529 for (idx=0; idx < cPagesAllocated; idx++) { 1530 if (count_tmp) { 1531 copied_bytes = min(rv_cb[idx].rv_count, (unsigned int)count_tmp); 1532 count_tmp -= copied_bytes; 1533 } else { 1534 copied_bytes = 0; 1535 } 1536 1537 kunmap(rv_cb[idx].rv_page); 1538 ret = mapping->a_ops->write_end(file, mapping, 1539 rv_cb[idx].rv_pos, 1540 rv_cb[idx].rv_count, 1541 copied_bytes, 1542 rv_cb[idx].rv_page, 1543 rv_cb[idx].rv_fsdata); 1544 1545 if (unlikely(ret < 0)) { 1546 printk(KERN_WARNING"%s: write_end fail,ret = %d\n", __func__, ret); 1547 } 1548 } 1549 1550 if (count) { 1551 balance_dirty_pages_ratelimited_nr(mapping, cPagesAllocated); 1552 } 1553 1554 /* Fix pos based on returned bytes from recvmsg */ 1555 pos = start_pos + count; 1556 if (off_out && copy_to_user(off_out, &pos, sizeof(loff_t))) 1557 ret = -EFAULT; 1558 1559done: 1560 current->backing_dev_info = NULL; 1561 mutex_unlock(&inode->i_mutex); 1562 1563 kfree(rv_cb); 1564 kfree(iov); 1565 1566 if (err) 1567 return err; 1568 else 1569 return count; 1570} 1571#endif /* CONFIG_BCM_RECVFILE */ 1572 1573/* 1574 * Map an iov into an array of pages and offset/length tupples. With the 1575 * partial_page structure, we can map several non-contiguous ranges into 1576 * our ones pages[] map instead of splitting that operation into pieces. 1577 * Could easily be exported as a generic helper for other users, in which 1578 * case one would probably want to add a 'max_nr_pages' parameter as well. 1579 */ 1580static int get_iovec_page_array(const struct iovec __user *iov, 1581 unsigned int nr_vecs, struct page **pages, 1582 struct partial_page *partial, int aligned, 1583 unsigned int pipe_buffers) 1584{ 1585 int buffers = 0, error = 0; 1586 1587 while (nr_vecs) { 1588 unsigned long off, npages; 1589 struct iovec entry; 1590 void __user *base; 1591 size_t len; 1592 int i; 1593 1594 error = -EFAULT; 1595 if (copy_from_user(&entry, iov, sizeof(entry))) 1596 break; 1597 1598 base = entry.iov_base; 1599 len = entry.iov_len; 1600 1601 /* 1602 * Sanity check this iovec. 0 read succeeds. 1603 */ 1604 error = 0; 1605 if (unlikely(!len)) 1606 break; 1607 error = -EFAULT; 1608 if (!access_ok(VERIFY_READ, base, len)) 1609 break; 1610 1611 /* 1612 * Get this base offset and number of pages, then map 1613 * in the user pages. 1614 */ 1615 off = (unsigned long) base & ~PAGE_MASK; 1616 1617 /* 1618 * If asked for alignment, the offset must be zero and the 1619 * length a multiple of the PAGE_SIZE. 1620 */ 1621 error = -EINVAL; 1622 if (aligned && (off || len & ~PAGE_MASK)) 1623 break; 1624 1625 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1626 if (npages > pipe_buffers - buffers) 1627 npages = pipe_buffers - buffers; 1628 1629 error = get_user_pages_fast((unsigned long)base, npages, 1630 0, &pages[buffers]); 1631 1632 if (unlikely(error <= 0)) 1633 break; 1634 1635 /* 1636 * Fill this contiguous range into the partial page map. 1637 */ 1638 for (i = 0; i < error; i++) { 1639 const int plen = min_t(size_t, len, PAGE_SIZE - off); 1640 1641 partial[buffers].offset = off; 1642 partial[buffers].len = plen; 1643 1644 off = 0; 1645 len -= plen; 1646 buffers++; 1647 } 1648 1649 /* 1650 * We didn't complete this iov, stop here since it probably 1651 * means we have to move some of this into a pipe to 1652 * be able to continue. 1653 */ 1654 if (len) 1655 break; 1656 1657 /* 1658 * Don't continue if we mapped fewer pages than we asked for, 1659 * or if we mapped the max number of pages that we have 1660 * room for. 1661 */ 1662 if (error < npages || buffers == pipe_buffers) 1663 break; 1664 1665 nr_vecs--; 1666 iov++; 1667 } 1668 1669 if (buffers) 1670 return buffers; 1671 1672 return error; 1673} 1674 1675static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 1676 struct splice_desc *sd) 1677{ 1678 char *src; 1679 int ret; 1680 1681 ret = buf->ops->confirm(pipe, buf); 1682 if (unlikely(ret)) 1683 return ret; 1684 1685 /* 1686 * See if we can use the atomic maps, by prefaulting in the 1687 * pages and doing an atomic copy 1688 */ 1689 if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { 1690 src = buf->ops->map(pipe, buf, 1); 1691 ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, 1692 sd->len); 1693 buf->ops->unmap(pipe, buf, src); 1694 if (!ret) { 1695 ret = sd->len; 1696 goto out; 1697 } 1698 } 1699 1700 /* 1701 * No dice, use slow non-atomic map and copy 1702 */ 1703 src = buf->ops->map(pipe, buf, 0); 1704 1705 ret = sd->len; 1706 if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) 1707 ret = -EFAULT; 1708 1709 buf->ops->unmap(pipe, buf, src); 1710out: 1711 if (ret > 0) 1712 sd->u.userptr += ret; 1713 return ret; 1714} 1715 1716/* 1717 * For lack of a better implementation, implement vmsplice() to userspace 1718 * as a simple copy of the pipes pages to the user iov. 1719 */ 1720static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, 1721 unsigned long nr_segs, unsigned int flags) 1722{ 1723 struct pipe_inode_info *pipe; 1724 struct splice_desc sd; 1725 ssize_t size; 1726 int error; 1727 long ret; 1728 1729 pipe = get_pipe_info(file); 1730 if (!pipe) 1731 return -EBADF; 1732 1733 pipe_lock(pipe); 1734 1735 error = ret = 0; 1736 while (nr_segs) { 1737 void __user *base; 1738 size_t len; 1739 1740 /* 1741 * Get user address base and length for this iovec. 1742 */ 1743 error = get_user(base, &iov->iov_base); 1744 if (unlikely(error)) 1745 break; 1746 error = get_user(len, &iov->iov_len); 1747 if (unlikely(error)) 1748 break; 1749 1750 /* 1751 * Sanity check this iovec. 0 read succeeds. 1752 */ 1753 if (unlikely(!len)) 1754 break; 1755 if (unlikely(!base)) { 1756 error = -EFAULT; 1757 break; 1758 } 1759 1760 if (unlikely(!access_ok(VERIFY_WRITE, base, len))) { 1761 error = -EFAULT; 1762 break; 1763 } 1764 1765 sd.len = 0; 1766 sd.total_len = len; 1767 sd.flags = flags; 1768 sd.u.userptr = base; 1769 sd.pos = 0; 1770 1771 size = __splice_from_pipe(pipe, &sd, pipe_to_user); 1772 if (size < 0) { 1773 if (!ret) 1774 ret = size; 1775 1776 break; 1777 } 1778 1779 ret += size; 1780 1781 if (size < len) 1782 break; 1783 1784 nr_segs--; 1785 iov++; 1786 } 1787 1788 pipe_unlock(pipe); 1789 1790 if (!ret) 1791 ret = error; 1792 1793 return ret; 1794} 1795 1796/* 1797 * vmsplice splices a user address range into a pipe. It can be thought of 1798 * as splice-from-memory, where the regular splice is splice-from-file (or 1799 * to file). In both cases the output is a pipe, naturally. 1800 */ 1801static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, 1802 unsigned long nr_segs, unsigned int flags) 1803{ 1804 struct pipe_inode_info *pipe; 1805 struct page *pages[PIPE_DEF_BUFFERS]; 1806 struct partial_page partial[PIPE_DEF_BUFFERS]; 1807 struct splice_pipe_desc spd = { 1808 .pages = pages, 1809 .partial = partial, 1810 .flags = flags, 1811 .ops = &user_page_pipe_buf_ops, 1812 .spd_release = spd_release_page, 1813 }; 1814 long ret; 1815 1816 pipe = get_pipe_info(file); 1817 if (!pipe) 1818 return -EBADF; 1819 1820 if (splice_grow_spd(pipe, &spd)) 1821 return -ENOMEM; 1822 1823 spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, 1824 spd.partial, flags & SPLICE_F_GIFT, 1825 pipe->buffers); 1826 if (spd.nr_pages <= 0) 1827 ret = spd.nr_pages; 1828 else 1829 ret = splice_to_pipe(pipe, &spd); 1830 1831 splice_shrink_spd(pipe, &spd); 1832 return ret; 1833} 1834 1835/* 1836 * Note that vmsplice only really supports true splicing _from_ user memory 1837 * to a pipe, not the other way around. Splicing from user memory is a simple 1838 * operation that can be supported without any funky alignment restrictions 1839 * or nasty vm tricks. We simply map in the user memory and fill them into 1840 * a pipe. The reverse isn't quite as easy, though. There are two possible 1841 * solutions for that: 1842 * 1843 * - memcpy() the data internally, at which point we might as well just 1844 * do a regular read() on the buffer anyway. 1845 * - Lots of nasty vm tricks, that are neither fast nor flexible (it 1846 * has restriction limitations on both ends of the pipe). 1847 * 1848 * Currently we punt and implement it as a normal copy, see pipe_to_user(). 1849 * 1850 */ 1851SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, 1852 unsigned long, nr_segs, unsigned int, flags) 1853{ 1854 struct file *file; 1855 long error; 1856 int fput; 1857 1858 if (unlikely(nr_segs > UIO_MAXIOV)) 1859 return -EINVAL; 1860 else if (unlikely(!nr_segs)) 1861 return 0; 1862 1863 error = -EBADF; 1864 file = fget_light(fd, &fput); 1865 if (file) { 1866 if (file->f_mode & FMODE_WRITE) 1867 error = vmsplice_to_pipe(file, iov, nr_segs, flags); 1868 else if (file->f_mode & FMODE_READ) 1869 error = vmsplice_to_user(file, iov, nr_segs, flags); 1870 1871 fput_light(file, fput); 1872 } 1873 1874 return error; 1875} 1876 1877SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, 1878 int, fd_out, loff_t __user *, off_out, 1879 size_t, len, unsigned int, flags) 1880{ 1881 long error; 1882 struct file *in, *out; 1883 int fput_in, fput_out; 1884 1885 if (unlikely(!len)) 1886 return 0; 1887 1888 error = -EBADF; 1889 1890#if defined(CONFIG_BCM_RECVFILE) 1891 /* If input is socket and output is file try to copy from socket to file directly */ 1892 { 1893 struct socket *sock = NULL; 1894 1895 /* Check if fd_in is a socket */ 1896 sock = sockfd_lookup(fd_in, (int *)&error); 1897 if (sock) { 1898 out = NULL; 1899 if (!sock->sk) 1900 goto done; 1901 1902 out = fget_light(fd_out, &fput_out); 1903 1904 if (out) { 1905 struct pipe_inode_info *opipe; 1906 1907 opipe = get_pipe_info(out); 1908 if (opipe) { 1909 /* Output is pipe go regular processing */ 1910 printk(KERN_WARNING "out_fd is a pipe\n"); 1911 goto regular_proc; 1912 } 1913 1914 if (!(out->f_mode & FMODE_WRITE)) 1915 goto done; 1916 1917 if ((out->f_op && out->f_op->splice_write)) { 1918 ssize_t (*splice_from_socket)(struct file *, struct socket *, loff_t __user *, size_t); 1919 1920 splice_from_socket = out->f_op->splice_write_from_socket; 1921 if(!splice_from_socket) 1922 splice_from_socket = do_splice_from_socket; 1923 error = splice_from_socket(out, sock, off_out, len); 1924 } else { 1925 /* Splice from socket->file not supported */ 1926 error = -EBADF; 1927 } 1928 } 1929done: 1930 if (out) 1931 fput_light(out, fput_out); 1932 fput(sock->file); 1933 return error; 1934 1935regular_proc: 1936 if (out) 1937 fput_light(out, fput_out); 1938 fput(sock->file); 1939 } 1940 } 1941#endif /* CONFIG_BCM_RECVFILE */ 1942 1943 in = fget_light(fd_in, &fput_in); 1944 if (in) { 1945 if (in->f_mode & FMODE_READ) { 1946 out = fget_light(fd_out, &fput_out); 1947 if (out) { 1948 if (out->f_mode & FMODE_WRITE) 1949 error = do_splice(in, off_in, 1950 out, off_out, 1951 len, flags); 1952 fput_light(out, fput_out); 1953 } 1954 } 1955 1956 fput_light(in, fput_in); 1957 } 1958 1959 return error; 1960} 1961 1962/* 1963 * Make sure there's data to read. Wait for input if we can, otherwise 1964 * return an appropriate error. 1965 */ 1966static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) 1967{ 1968 int ret; 1969 1970 /* 1971 * Check ->nrbufs without the inode lock first. This function 1972 * is speculative anyways, so missing one is ok. 1973 */ 1974 if (pipe->nrbufs) 1975 return 0; 1976 1977 ret = 0; 1978 pipe_lock(pipe); 1979 1980 while (!pipe->nrbufs) { 1981 if (signal_pending(current)) { 1982 ret = -ERESTARTSYS; 1983 break; 1984 } 1985 if (!pipe->writers) 1986 break; 1987 if (!pipe->waiting_writers) { 1988 if (flags & SPLICE_F_NONBLOCK) { 1989 ret = -EAGAIN; 1990 break; 1991 } 1992 } 1993 pipe_wait(pipe); 1994 } 1995 1996 pipe_unlock(pipe); 1997 return ret; 1998} 1999 2000/* 2001 * Make sure there's writeable room. Wait for room if we can, otherwise 2002 * return an appropriate error. 2003 */ 2004static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) 2005{ 2006 int ret; 2007 2008 /* 2009 * Check ->nrbufs without the inode lock first. This function 2010 * is speculative anyways, so missing one is ok. 2011 */ 2012 if (pipe->nrbufs < pipe->buffers) 2013 return 0; 2014 2015 ret = 0; 2016 pipe_lock(pipe); 2017 2018 while (pipe->nrbufs >= pipe->buffers) { 2019 if (!pipe->readers) { 2020 send_sig(SIGPIPE, current, 0); 2021 ret = -EPIPE; 2022 break; 2023 } 2024 if (flags & SPLICE_F_NONBLOCK) { 2025 ret = -EAGAIN; 2026 break; 2027 } 2028 if (signal_pending(current)) { 2029 ret = -ERESTARTSYS; 2030 break; 2031 } 2032 pipe->waiting_writers++; 2033 pipe_wait(pipe); 2034 pipe->waiting_writers--; 2035 } 2036 2037 pipe_unlock(pipe); 2038 return ret; 2039} 2040 2041/* 2042 * Splice contents of ipipe to opipe. 2043 */ 2044static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 2045 struct pipe_inode_info *opipe, 2046 size_t len, unsigned int flags) 2047{ 2048 struct pipe_buffer *ibuf, *obuf; 2049 int ret = 0, nbuf; 2050 bool input_wakeup = false; 2051 2052 2053retry: 2054 ret = ipipe_prep(ipipe, flags); 2055 if (ret) 2056 return ret; 2057 2058 ret = opipe_prep(opipe, flags); 2059 if (ret) 2060 return ret; 2061 2062 pipe_double_lock(ipipe, opipe); 2063 2064 do { 2065 if (!opipe->readers) { 2066 send_sig(SIGPIPE, current, 0); 2067 if (!ret) 2068 ret = -EPIPE; 2069 break; 2070 } 2071 2072 if (!ipipe->nrbufs && !ipipe->writers) 2073 break; 2074 2075 /* 2076 * Cannot make any progress, because either the input 2077 * pipe is empty or the output pipe is full. 2078 */ 2079 if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) { 2080 /* Already processed some buffers, break */ 2081 if (ret) 2082 break; 2083 2084 if (flags & SPLICE_F_NONBLOCK) { 2085 ret = -EAGAIN; 2086 break; 2087 } 2088 2089 /* 2090 * We raced with another reader/writer and haven't 2091 * managed to process any buffers. A zero return 2092 * value means EOF, so retry instead. 2093 */ 2094 pipe_unlock(ipipe); 2095 pipe_unlock(opipe); 2096 goto retry; 2097 } 2098 2099 ibuf = ipipe->bufs + ipipe->curbuf; 2100 nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); 2101 obuf = opipe->bufs + nbuf; 2102 2103 if (len >= ibuf->len) { 2104 /* 2105 * Simply move the whole buffer from ipipe to opipe 2106 */ 2107 *obuf = *ibuf; 2108 ibuf->ops = NULL; 2109 opipe->nrbufs++; 2110 ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1); 2111 ipipe->nrbufs--; 2112 input_wakeup = true; 2113 } else { 2114 /* 2115 * Get a reference to this pipe buffer, 2116 * so we can copy the contents over. 2117 */ 2118 ibuf->ops->get(ipipe, ibuf); 2119 *obuf = *ibuf; 2120 2121 /* 2122 * Don't inherit the gift flag, we need to 2123 * prevent multiple steals of this page. 2124 */ 2125 obuf->flags &= ~PIPE_BUF_FLAG_GIFT; 2126 2127 obuf->len = len; 2128 opipe->nrbufs++; 2129 ibuf->offset += obuf->len; 2130 ibuf->len -= obuf->len; 2131 } 2132 ret += obuf->len; 2133 len -= obuf->len; 2134 } while (len); 2135 2136 pipe_unlock(ipipe); 2137 pipe_unlock(opipe); 2138 2139 /* 2140 * If we put data in the output pipe, wakeup any potential readers. 2141 */ 2142 if (ret > 0) { 2143 smp_mb(); 2144 if (waitqueue_active(&opipe->wait)) 2145 wake_up_interruptible(&opipe->wait); 2146 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); 2147 } 2148 if (input_wakeup) 2149 wakeup_pipe_writers(ipipe); 2150 2151 return ret; 2152} 2153 2154/* 2155 * Link contents of ipipe to opipe. 2156 */ 2157static int link_pipe(struct pipe_inode_info *ipipe, 2158 struct pipe_inode_info *opipe, 2159 size_t len, unsigned int flags) 2160{ 2161 struct pipe_buffer *ibuf, *obuf; 2162 int ret = 0, i = 0, nbuf; 2163 2164 pipe_double_lock(ipipe, opipe); 2165 2166 do { 2167 if (!opipe->readers) { 2168 send_sig(SIGPIPE, current, 0); 2169 if (!ret) 2170 ret = -EPIPE; 2171 break; 2172 } 2173 2174 /* 2175 * If we have iterated all input buffers or ran out of 2176 * output room, break. 2177 */ 2178 if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) 2179 break; 2180 2181 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1)); 2182 nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); 2183 2184 /* 2185 * Get a reference to this pipe buffer, 2186 * so we can copy the contents over. 2187 */ 2188 ibuf->ops->get(ipipe, ibuf); 2189 2190 obuf = opipe->bufs + nbuf; 2191 *obuf = *ibuf; 2192 2193 /* 2194 * Don't inherit the gift flag, we need to 2195 * prevent multiple steals of this page. 2196 */ 2197 obuf->flags &= ~PIPE_BUF_FLAG_GIFT; 2198 2199 if (obuf->len > len) 2200 obuf->len = len; 2201 2202 opipe->nrbufs++; 2203 ret += obuf->len; 2204 len -= obuf->len; 2205 i++; 2206 } while (len); 2207 2208 /* 2209 * return EAGAIN if we have the potential of some data in the 2210 * future, otherwise just return 0 2211 */ 2212 if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) 2213 ret = -EAGAIN; 2214 2215 pipe_unlock(ipipe); 2216 pipe_unlock(opipe); 2217 2218 /* 2219 * If we put data in the output pipe, wakeup any potential readers. 2220 */ 2221 if (ret > 0) { 2222 smp_mb(); 2223 if (waitqueue_active(&opipe->wait)) 2224 wake_up_interruptible(&opipe->wait); 2225 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); 2226 } 2227 2228 return ret; 2229} 2230 2231/* 2232 * This is a tee(1) implementation that works on pipes. It doesn't copy 2233 * any data, it simply references the 'in' pages on the 'out' pipe. 2234 * The 'flags' used are the SPLICE_F_* variants, currently the only 2235 * applicable one is SPLICE_F_NONBLOCK. 2236 */ 2237static long do_tee(struct file *in, struct file *out, size_t len, 2238 unsigned int flags) 2239{ 2240 struct pipe_inode_info *ipipe = get_pipe_info(in); 2241 struct pipe_inode_info *opipe = get_pipe_info(out); 2242 int ret = -EINVAL; 2243 2244 /* 2245 * Duplicate the contents of ipipe to opipe without actually 2246 * copying the data. 2247 */ 2248 if (ipipe && opipe && ipipe != opipe) { 2249 /* 2250 * Keep going, unless we encounter an error. The ipipe/opipe 2251 * ordering doesn't really matter. 2252 */ 2253 ret = ipipe_prep(ipipe, flags); 2254 if (!ret) { 2255 ret = opipe_prep(opipe, flags); 2256 if (!ret) 2257 ret = link_pipe(ipipe, opipe, len, flags); 2258 } 2259 } 2260 2261 return ret; 2262} 2263 2264SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) 2265{ 2266 struct file *in; 2267 int error, fput_in; 2268 2269 if (unlikely(!len)) 2270 return 0; 2271 2272 error = -EBADF; 2273 in = fget_light(fdin, &fput_in); 2274 if (in) { 2275 if (in->f_mode & FMODE_READ) { 2276 int fput_out; 2277 struct file *out = fget_light(fdout, &fput_out); 2278 2279 if (out) { 2280 if (out->f_mode & FMODE_WRITE) 2281 error = do_tee(in, out, len, flags); 2282 fput_light(out, fput_out); 2283 } 2284 } 2285 fput_light(in, fput_in); 2286 } 2287 2288 return error; 2289} 2290