1// SPDX-License-Identifier: GPL-2.0 2/* 3 * SUCS NET3: 4 * 5 * Generic datagram handling routines. These are generic for all 6 * protocols. Possibly a generic IP version on top of these would 7 * make sense. Not tonight however 8-). 8 * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and 9 * NetROM layer all have identical poll code and mostly 10 * identical recvmsg() code. So we share it here. The poll was 11 * shared before but buried in udp.c so I moved it. 12 * 13 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old 14 * udp.c code) 15 * 16 * Fixes: 17 * Alan Cox : NULL return from skb_peek_copy() 18 * understood 19 * Alan Cox : Rewrote skb_read_datagram to avoid the 20 * skb_peek_copy stuff. 21 * Alan Cox : Added support for SOCK_SEQPACKET. 22 * IPX can no longer use the SO_TYPE hack 23 * but AX.25 now works right, and SPX is 24 * feasible. 25 * Alan Cox : Fixed write poll of non IP protocol 26 * crash. 27 * Florian La Roche: Changed for my new skbuff handling. 28 * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. 29 * Linus Torvalds : BSD semantic fixes. 30 * Alan Cox : Datagram iovec handling 31 * Darryl Miles : Fixed non-blocking SOCK_STREAM. 32 * Alan Cox : POSIXisms 33 * Pete Wyckoff : Unconnected accept() fix. 34 * 35 */ 36 37#include <linux/module.h> 38#include <linux/types.h> 39#include <linux/kernel.h> 40#include <linux/uaccess.h> 41#include <linux/mm.h> 42#include <linux/interrupt.h> 43#include <linux/errno.h> 44#include <linux/sched.h> 45#include <linux/inet.h> 46#include <linux/netdevice.h> 47#include <linux/rtnetlink.h> 48#include <linux/poll.h> 49#include <linux/highmem.h> 50#include <linux/spinlock.h> 51#include <linux/slab.h> 52#include <linux/pagemap.h> 53#include <linux/iov_iter.h> 54#include <linux/indirect_call_wrapper.h> 55 56#include <net/protocol.h> 57#include <linux/skbuff.h> 58 59#include <net/checksum.h> 60#include <net/sock.h> 61#include <net/tcp_states.h> 62#include <trace/events/skb.h> 63#include <net/busy_poll.h> 64#include <crypto/hash.h> 65 66/* 67 * Is a socket 'connection oriented' ? 68 */ 69static inline int connection_based(struct sock *sk) 70{ 71 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; 72} 73 74static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, 75 void *key) 76{ 77 /* 78 * Avoid a wakeup if event not interesting for us 79 */ 80 if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR))) 81 return 0; 82 return autoremove_wake_function(wait, mode, sync, key); 83} 84/* 85 * Wait for the last received packet to be different from skb 86 */ 87int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, 88 int *err, long *timeo_p, 89 const struct sk_buff *skb) 90{ 91 int error; 92 DEFINE_WAIT_FUNC(wait, receiver_wake_function); 93 94 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 95 96 /* Socket errors? */ 97 error = sock_error(sk); 98 if (error) 99 goto out_err; 100 101 if (READ_ONCE(queue->prev) != skb) 102 goto out; 103 104 /* Socket shut down? */ 105 if (sk->sk_shutdown & RCV_SHUTDOWN) 106 goto out_noerr; 107 108 /* Sequenced packets can come disconnected. 109 * If so we report the problem 110 */ 111 error = -ENOTCONN; 112 if (connection_based(sk) && 113 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN)) 114 goto out_err; 115 116 /* handle signals */ 117 if (signal_pending(current)) 118 goto interrupted; 119 120 error = 0; 121 *timeo_p = schedule_timeout(*timeo_p); 122out: 123 finish_wait(sk_sleep(sk), &wait); 124 return error; 125interrupted: 126 error = sock_intr_errno(*timeo_p); 127out_err: 128 *err = error; 129 goto out; 130out_noerr: 131 *err = 0; 132 error = 1; 133 goto out; 134} 135EXPORT_SYMBOL(__skb_wait_for_more_packets); 136 137static struct sk_buff *skb_set_peeked(struct sk_buff *skb) 138{ 139 struct sk_buff *nskb; 140 141 if (skb->peeked) 142 return skb; 143 144 /* We have to unshare an skb before modifying it. */ 145 if (!skb_shared(skb)) 146 goto done; 147 148 nskb = skb_clone(skb, GFP_ATOMIC); 149 if (!nskb) 150 return ERR_PTR(-ENOMEM); 151 152 skb->prev->next = nskb; 153 skb->next->prev = nskb; 154 nskb->prev = skb->prev; 155 nskb->next = skb->next; 156 157 consume_skb(skb); 158 skb = nskb; 159 160done: 161 skb->peeked = 1; 162 163 return skb; 164} 165 166struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, 167 struct sk_buff_head *queue, 168 unsigned int flags, 169 int *off, int *err, 170 struct sk_buff **last) 171{ 172 bool peek_at_off = false; 173 struct sk_buff *skb; 174 int _off = 0; 175 176 if (unlikely(flags & MSG_PEEK && *off >= 0)) { 177 peek_at_off = true; 178 _off = *off; 179 } 180 181 *last = queue->prev; 182 skb_queue_walk(queue, skb) { 183 if (flags & MSG_PEEK) { 184 if (peek_at_off && _off >= skb->len && 185 (_off || skb->peeked)) { 186 _off -= skb->len; 187 continue; 188 } 189 if (!skb->len) { 190 skb = skb_set_peeked(skb); 191 if (IS_ERR(skb)) { 192 *err = PTR_ERR(skb); 193 return NULL; 194 } 195 } 196 refcount_inc(&skb->users); 197 } else { 198 __skb_unlink(skb, queue); 199 } 200 *off = _off; 201 return skb; 202 } 203 return NULL; 204} 205 206/** 207 * __skb_try_recv_datagram - Receive a datagram skbuff 208 * @sk: socket 209 * @queue: socket queue from which to receive 210 * @flags: MSG\_ flags 211 * @off: an offset in bytes to peek skb from. Returns an offset 212 * within an skb where data actually starts 213 * @err: error code returned 214 * @last: set to last peeked message to inform the wait function 215 * what to look for when peeking 216 * 217 * Get a datagram skbuff, understands the peeking, nonblocking wakeups 218 * and possible races. This replaces identical code in packet, raw and 219 * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes 220 * the long standing peek and read race for datagram sockets. If you 221 * alter this routine remember it must be re-entrant. 222 * 223 * This function will lock the socket if a skb is returned, so 224 * the caller needs to unlock the socket in that case (usually by 225 * calling skb_free_datagram). Returns NULL with @err set to 226 * -EAGAIN if no data was available or to some other value if an 227 * error was detected. 228 * 229 * * It does not lock socket since today. This function is 230 * * free of race conditions. This measure should/can improve 231 * * significantly datagram socket latencies at high loads, 232 * * when data copying to user space takes lots of time. 233 * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet 234 * * 8) Great win.) 235 * * --ANK (980729) 236 * 237 * The order of the tests when we find no data waiting are specified 238 * quite explicitly by POSIX 1003.1g, don't change them without having 239 * the standard around please. 240 */ 241struct sk_buff *__skb_try_recv_datagram(struct sock *sk, 242 struct sk_buff_head *queue, 243 unsigned int flags, int *off, int *err, 244 struct sk_buff **last) 245{ 246 struct sk_buff *skb; 247 unsigned long cpu_flags; 248 /* 249 * Caller is allowed not to check sk->sk_err before skb_recv_datagram() 250 */ 251 int error = sock_error(sk); 252 253 if (error) 254 goto no_packet; 255 256 do { 257 /* Again only user level code calls this function, so nothing 258 * interrupt level will suddenly eat the receive_queue. 259 * 260 * Look at current nfs client by the way... 261 * However, this function was correct in any case. 8) 262 */ 263 spin_lock_irqsave(&queue->lock, cpu_flags); 264 skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error, 265 last); 266 spin_unlock_irqrestore(&queue->lock, cpu_flags); 267 if (error) 268 goto no_packet; 269 if (skb) 270 return skb; 271 272 if (!sk_can_busy_loop(sk)) 273 break; 274 275 sk_busy_loop(sk, flags & MSG_DONTWAIT); 276 } while (READ_ONCE(queue->prev) != *last); 277 278 error = -EAGAIN; 279 280no_packet: 281 *err = error; 282 return NULL; 283} 284EXPORT_SYMBOL(__skb_try_recv_datagram); 285 286struct sk_buff *__skb_recv_datagram(struct sock *sk, 287 struct sk_buff_head *sk_queue, 288 unsigned int flags, int *off, int *err) 289{ 290 struct sk_buff *skb, *last; 291 long timeo; 292 293 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 294 295 do { 296 skb = __skb_try_recv_datagram(sk, sk_queue, flags, off, err, 297 &last); 298 if (skb) 299 return skb; 300 301 if (*err != -EAGAIN) 302 break; 303 } while (timeo && 304 !__skb_wait_for_more_packets(sk, sk_queue, err, 305 &timeo, last)); 306 307 return NULL; 308} 309EXPORT_SYMBOL(__skb_recv_datagram); 310 311struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, 312 int *err) 313{ 314 int off = 0; 315 316 return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags, 317 &off, err); 318} 319EXPORT_SYMBOL(skb_recv_datagram); 320 321void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 322{ 323 consume_skb(skb); 324} 325EXPORT_SYMBOL(skb_free_datagram); 326 327void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) 328{ 329 bool slow; 330 331 if (!skb_unref(skb)) { 332 sk_peek_offset_bwd(sk, len); 333 return; 334 } 335 336 slow = lock_sock_fast(sk); 337 sk_peek_offset_bwd(sk, len); 338 skb_orphan(skb); 339 unlock_sock_fast(sk, slow); 340 341 /* skb is now orphaned, can be freed outside of locked section */ 342 __kfree_skb(skb); 343} 344EXPORT_SYMBOL(__skb_free_datagram_locked); 345 346int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, 347 struct sk_buff *skb, unsigned int flags, 348 void (*destructor)(struct sock *sk, 349 struct sk_buff *skb)) 350{ 351 int err = 0; 352 353 if (flags & MSG_PEEK) { 354 err = -ENOENT; 355 spin_lock_bh(&sk_queue->lock); 356 if (skb->next) { 357 __skb_unlink(skb, sk_queue); 358 refcount_dec(&skb->users); 359 if (destructor) 360 destructor(sk, skb); 361 err = 0; 362 } 363 spin_unlock_bh(&sk_queue->lock); 364 } 365 366 atomic_inc(&sk->sk_drops); 367 return err; 368} 369EXPORT_SYMBOL(__sk_queue_drop_skb); 370 371/** 372 * skb_kill_datagram - Free a datagram skbuff forcibly 373 * @sk: socket 374 * @skb: datagram skbuff 375 * @flags: MSG\_ flags 376 * 377 * This function frees a datagram skbuff that was received by 378 * skb_recv_datagram. The flags argument must match the one 379 * used for skb_recv_datagram. 380 * 381 * If the MSG_PEEK flag is set, and the packet is still on the 382 * receive queue of the socket, it will be taken off the queue 383 * before it is freed. 384 * 385 * This function currently only disables BH when acquiring the 386 * sk_receive_queue lock. Therefore it must not be used in a 387 * context where that lock is acquired in an IRQ context. 388 * 389 * It returns 0 if the packet was removed by us. 390 */ 391 392int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) 393{ 394 int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags, 395 NULL); 396 397 kfree_skb(skb); 398 return err; 399} 400EXPORT_SYMBOL(skb_kill_datagram); 401 402INDIRECT_CALLABLE_DECLARE(static size_t simple_copy_to_iter(const void *addr, 403 size_t bytes, 404 void *data __always_unused, 405 struct iov_iter *i)); 406 407static int __skb_datagram_iter(const struct sk_buff *skb, int offset, 408 struct iov_iter *to, int len, bool fault_short, 409 size_t (*cb)(const void *, size_t, void *, 410 struct iov_iter *), void *data) 411{ 412 int start = skb_headlen(skb); 413 int i, copy = start - offset, start_off = offset, n; 414 struct sk_buff *frag_iter; 415 416 /* Copy header. */ 417 if (copy > 0) { 418 if (copy > len) 419 copy = len; 420 n = INDIRECT_CALL_1(cb, simple_copy_to_iter, 421 skb->data + offset, copy, data, to); 422 offset += n; 423 if (n != copy) 424 goto short_copy; 425 if ((len -= copy) == 0) 426 return 0; 427 } 428 429 /* Copy paged appendix. Hmm... why does this look so complicated? */ 430 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 431 int end; 432 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 433 434 WARN_ON(start > offset + len); 435 436 end = start + skb_frag_size(frag); 437 if ((copy = end - offset) > 0) { 438 struct page *page = skb_frag_page(frag); 439 u8 *vaddr = kmap(page); 440 441 if (copy > len) 442 copy = len; 443 n = INDIRECT_CALL_1(cb, simple_copy_to_iter, 444 vaddr + skb_frag_off(frag) + offset - start, 445 copy, data, to); 446 kunmap(page); 447 offset += n; 448 if (n != copy) 449 goto short_copy; 450 if (!(len -= copy)) 451 return 0; 452 } 453 start = end; 454 } 455 456 skb_walk_frags(skb, frag_iter) { 457 int end; 458 459 WARN_ON(start > offset + len); 460 461 end = start + frag_iter->len; 462 if ((copy = end - offset) > 0) { 463 if (copy > len) 464 copy = len; 465 if (__skb_datagram_iter(frag_iter, offset - start, 466 to, copy, fault_short, cb, data)) 467 goto fault; 468 if ((len -= copy) == 0) 469 return 0; 470 offset += copy; 471 } 472 start = end; 473 } 474 if (!len) 475 return 0; 476 477 /* This is not really a user copy fault, but rather someone 478 * gave us a bogus length on the skb. We should probably 479 * print a warning here as it may indicate a kernel bug. 480 */ 481 482fault: 483 iov_iter_revert(to, offset - start_off); 484 return -EFAULT; 485 486short_copy: 487 if (fault_short || iov_iter_count(to)) 488 goto fault; 489 490 return 0; 491} 492 493static size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, 494 struct iov_iter *i) 495{ 496#ifdef CONFIG_CRYPTO_HASH 497 struct ahash_request *hash = hashp; 498 struct scatterlist sg; 499 size_t copied; 500 501 copied = copy_to_iter(addr, bytes, i); 502 sg_init_one(&sg, addr, copied); 503 ahash_request_set_crypt(hash, &sg, NULL, copied); 504 crypto_ahash_update(hash); 505 return copied; 506#else 507 return 0; 508#endif 509} 510 511/** 512 * skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator 513 * and update a hash. 514 * @skb: buffer to copy 515 * @offset: offset in the buffer to start copying from 516 * @to: iovec iterator to copy to 517 * @len: amount of data to copy from buffer to iovec 518 * @hash: hash request to update 519 */ 520int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, 521 struct iov_iter *to, int len, 522 struct ahash_request *hash) 523{ 524 return __skb_datagram_iter(skb, offset, to, len, true, 525 hash_and_copy_to_iter, hash); 526} 527EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter); 528 529static size_t simple_copy_to_iter(const void *addr, size_t bytes, 530 void *data __always_unused, struct iov_iter *i) 531{ 532 return copy_to_iter(addr, bytes, i); 533} 534 535/** 536 * skb_copy_datagram_iter - Copy a datagram to an iovec iterator. 537 * @skb: buffer to copy 538 * @offset: offset in the buffer to start copying from 539 * @to: iovec iterator to copy to 540 * @len: amount of data to copy from buffer to iovec 541 */ 542int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, 543 struct iov_iter *to, int len) 544{ 545 trace_skb_copy_datagram_iovec(skb, len); 546 return __skb_datagram_iter(skb, offset, to, len, false, 547 simple_copy_to_iter, NULL); 548} 549EXPORT_SYMBOL(skb_copy_datagram_iter); 550 551/** 552 * skb_copy_datagram_from_iter - Copy a datagram from an iov_iter. 553 * @skb: buffer to copy 554 * @offset: offset in the buffer to start copying to 555 * @from: the copy source 556 * @len: amount of data to copy to buffer from iovec 557 * 558 * Returns 0 or -EFAULT. 559 */ 560int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, 561 struct iov_iter *from, 562 int len) 563{ 564 int start = skb_headlen(skb); 565 int i, copy = start - offset; 566 struct sk_buff *frag_iter; 567 568 /* Copy header. */ 569 if (copy > 0) { 570 if (copy > len) 571 copy = len; 572 if (copy_from_iter(skb->data + offset, copy, from) != copy) 573 goto fault; 574 if ((len -= copy) == 0) 575 return 0; 576 offset += copy; 577 } 578 579 /* Copy paged appendix. Hmm... why does this look so complicated? */ 580 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 581 int end; 582 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 583 584 WARN_ON(start > offset + len); 585 586 end = start + skb_frag_size(frag); 587 if ((copy = end - offset) > 0) { 588 size_t copied; 589 590 if (copy > len) 591 copy = len; 592 copied = copy_page_from_iter(skb_frag_page(frag), 593 skb_frag_off(frag) + offset - start, 594 copy, from); 595 if (copied != copy) 596 goto fault; 597 598 if (!(len -= copy)) 599 return 0; 600 offset += copy; 601 } 602 start = end; 603 } 604 605 skb_walk_frags(skb, frag_iter) { 606 int end; 607 608 WARN_ON(start > offset + len); 609 610 end = start + frag_iter->len; 611 if ((copy = end - offset) > 0) { 612 if (copy > len) 613 copy = len; 614 if (skb_copy_datagram_from_iter(frag_iter, 615 offset - start, 616 from, copy)) 617 goto fault; 618 if ((len -= copy) == 0) 619 return 0; 620 offset += copy; 621 } 622 start = end; 623 } 624 if (!len) 625 return 0; 626 627fault: 628 return -EFAULT; 629} 630EXPORT_SYMBOL(skb_copy_datagram_from_iter); 631 632int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, 633 struct sk_buff *skb, struct iov_iter *from, 634 size_t length) 635{ 636 int frag; 637 638 if (msg && msg->msg_ubuf && msg->sg_from_iter) 639 return msg->sg_from_iter(sk, skb, from, length); 640 641 frag = skb_shinfo(skb)->nr_frags; 642 643 while (length && iov_iter_count(from)) { 644 struct page *head, *last_head = NULL; 645 struct page *pages[MAX_SKB_FRAGS]; 646 int refs, order, n = 0; 647 size_t start; 648 ssize_t copied; 649 unsigned long truesize; 650 651 if (frag == MAX_SKB_FRAGS) 652 return -EMSGSIZE; 653 654 copied = iov_iter_get_pages2(from, pages, length, 655 MAX_SKB_FRAGS - frag, &start); 656 if (copied < 0) 657 return -EFAULT; 658 659 length -= copied; 660 661 truesize = PAGE_ALIGN(copied + start); 662 skb->data_len += copied; 663 skb->len += copied; 664 skb->truesize += truesize; 665 if (sk && sk->sk_type == SOCK_STREAM) { 666 sk_wmem_queued_add(sk, truesize); 667 if (!skb_zcopy_pure(skb)) 668 sk_mem_charge(sk, truesize); 669 } else { 670 refcount_add(truesize, &skb->sk->sk_wmem_alloc); 671 } 672 673 head = compound_head(pages[n]); 674 order = compound_order(head); 675 676 for (refs = 0; copied != 0; start = 0) { 677 int size = min_t(int, copied, PAGE_SIZE - start); 678 679 if (pages[n] - head > (1UL << order) - 1) { 680 head = compound_head(pages[n]); 681 order = compound_order(head); 682 } 683 684 start += (pages[n] - head) << PAGE_SHIFT; 685 copied -= size; 686 n++; 687 if (frag) { 688 skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1]; 689 690 if (head == skb_frag_page(last) && 691 start == skb_frag_off(last) + skb_frag_size(last)) { 692 skb_frag_size_add(last, size); 693 /* We combined this page, we need to release 694 * a reference. Since compound pages refcount 695 * is shared among many pages, batch the refcount 696 * adjustments to limit false sharing. 697 */ 698 last_head = head; 699 refs++; 700 continue; 701 } 702 } 703 if (refs) { 704 page_ref_sub(last_head, refs); 705 refs = 0; 706 } 707 skb_fill_page_desc_noacc(skb, frag++, head, start, size); 708 } 709 if (refs) 710 page_ref_sub(last_head, refs); 711 } 712 return 0; 713} 714EXPORT_SYMBOL(__zerocopy_sg_from_iter); 715 716/** 717 * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter 718 * @skb: buffer to copy 719 * @from: the source to copy from 720 * 721 * The function will first copy up to headlen, and then pin the userspace 722 * pages and build frags through them. 723 * 724 * Returns 0, -EFAULT or -EMSGSIZE. 725 */ 726int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) 727{ 728 int copy = min_t(int, skb_headlen(skb), iov_iter_count(from)); 729 730 /* copy up to skb headlen */ 731 if (skb_copy_datagram_from_iter(skb, 0, from, copy)) 732 return -EFAULT; 733 734 return __zerocopy_sg_from_iter(NULL, NULL, skb, from, ~0U); 735} 736EXPORT_SYMBOL(zerocopy_sg_from_iter); 737 738static __always_inline 739size_t copy_to_user_iter_csum(void __user *iter_to, size_t progress, 740 size_t len, void *from, void *priv2) 741{ 742 __wsum next, *csum = priv2; 743 744 next = csum_and_copy_to_user(from + progress, iter_to, len); 745 *csum = csum_block_add(*csum, next, progress); 746 return next ? 0 : len; 747} 748 749static __always_inline 750size_t memcpy_to_iter_csum(void *iter_to, size_t progress, 751 size_t len, void *from, void *priv2) 752{ 753 __wsum *csum = priv2; 754 __wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len); 755 756 *csum = csum_block_add(*csum, next, progress); 757 return 0; 758} 759 760struct csum_state { 761 __wsum csum; 762 size_t off; 763}; 764 765static size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, 766 struct iov_iter *i) 767{ 768 struct csum_state *csstate = _csstate; 769 __wsum sum; 770 771 if (WARN_ON_ONCE(i->data_source)) 772 return 0; 773 if (unlikely(iov_iter_is_discard(i))) { 774 // can't use csum_memcpy() for that one - data is not copied 775 csstate->csum = csum_block_add(csstate->csum, 776 csum_partial(addr, bytes, 0), 777 csstate->off); 778 csstate->off += bytes; 779 return bytes; 780 } 781 782 sum = csum_shift(csstate->csum, csstate->off); 783 784 bytes = iterate_and_advance2(i, bytes, (void *)addr, &sum, 785 copy_to_user_iter_csum, 786 memcpy_to_iter_csum); 787 csstate->csum = csum_shift(sum, csstate->off); 788 csstate->off += bytes; 789 return bytes; 790} 791 792/** 793 * skb_copy_and_csum_datagram - Copy datagram to an iovec iterator 794 * and update a checksum. 795 * @skb: buffer to copy 796 * @offset: offset in the buffer to start copying from 797 * @to: iovec iterator to copy to 798 * @len: amount of data to copy from buffer to iovec 799 * @csump: checksum pointer 800 */ 801static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, 802 struct iov_iter *to, int len, 803 __wsum *csump) 804{ 805 struct csum_state csdata = { .csum = *csump }; 806 int ret; 807 808 ret = __skb_datagram_iter(skb, offset, to, len, true, 809 csum_and_copy_to_iter, &csdata); 810 if (ret) 811 return ret; 812 813 *csump = csdata.csum; 814 return 0; 815} 816 817/** 818 * skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec. 819 * @skb: skbuff 820 * @hlen: hardware length 821 * @msg: destination 822 * 823 * Caller _must_ check that skb will fit to this iovec. 824 * 825 * Returns: 0 - success. 826 * -EINVAL - checksum failure. 827 * -EFAULT - fault during copy. 828 */ 829int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, 830 int hlen, struct msghdr *msg) 831{ 832 __wsum csum; 833 int chunk = skb->len - hlen; 834 835 if (!chunk) 836 return 0; 837 838 if (msg_data_left(msg) < chunk) { 839 if (__skb_checksum_complete(skb)) 840 return -EINVAL; 841 if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) 842 goto fault; 843 } else { 844 csum = csum_partial(skb->data, hlen, skb->csum); 845 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter, 846 chunk, &csum)) 847 goto fault; 848 849 if (csum_fold(csum)) { 850 iov_iter_revert(&msg->msg_iter, chunk); 851 return -EINVAL; 852 } 853 854 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && 855 !skb->csum_complete_sw) 856 netdev_rx_csum_fault(NULL, skb); 857 } 858 return 0; 859fault: 860 return -EFAULT; 861} 862EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); 863 864/** 865 * datagram_poll - generic datagram poll 866 * @file: file struct 867 * @sock: socket 868 * @wait: poll table 869 * 870 * Datagram poll: Again totally generic. This also handles 871 * sequenced packet sockets providing the socket receive queue 872 * is only ever holding data ready to receive. 873 * 874 * Note: when you *don't* use this routine for this protocol, 875 * and you use a different write policy from sock_writeable() 876 * then please supply your own write_space callback. 877 */ 878__poll_t datagram_poll(struct file *file, struct socket *sock, 879 poll_table *wait) 880{ 881 struct sock *sk = sock->sk; 882 __poll_t mask; 883 u8 shutdown; 884 885 sock_poll_wait(file, sock, wait); 886 mask = 0; 887 888 /* exceptional events? */ 889 if (READ_ONCE(sk->sk_err) || 890 !skb_queue_empty_lockless(&sk->sk_error_queue)) 891 mask |= EPOLLERR | 892 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 893 894 shutdown = READ_ONCE(sk->sk_shutdown); 895 if (shutdown & RCV_SHUTDOWN) 896 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 897 if (shutdown == SHUTDOWN_MASK) 898 mask |= EPOLLHUP; 899 900 /* readable? */ 901 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 902 mask |= EPOLLIN | EPOLLRDNORM; 903 904 /* Connection-based need to check for termination and startup */ 905 if (connection_based(sk)) { 906 int state = READ_ONCE(sk->sk_state); 907 908 if (state == TCP_CLOSE) 909 mask |= EPOLLHUP; 910 /* connection hasn't started yet? */ 911 if (state == TCP_SYN_SENT) 912 return mask; 913 } 914 915 /* writable? */ 916 if (sock_writeable(sk)) 917 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 918 else 919 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 920 921 return mask; 922} 923EXPORT_SYMBOL(datagram_poll); 924