1// SPDX-License-Identifier: GPL-2.0
2/*
3 *	SUCS NET3:
4 *
5 *	Generic datagram handling routines. These are generic for all
6 *	protocols. Possibly a generic IP version on top of these would
7 *	make sense. Not tonight however 8-).
8 *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
9 *	NetROM layer all have identical poll code and mostly
10 *	identical recvmsg() code. So we share it here. The poll was
11 *	shared before but buried in udp.c so I moved it.
12 *
13 *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
14 *						     udp.c code)
15 *
16 *	Fixes:
17 *		Alan Cox	:	NULL return from skb_peek_copy()
18 *					understood
19 *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
20 *					skb_peek_copy stuff.
21 *		Alan Cox	:	Added support for SOCK_SEQPACKET.
22 *					IPX can no longer use the SO_TYPE hack
23 *					but AX.25 now works right, and SPX is
24 *					feasible.
25 *		Alan Cox	:	Fixed write poll of non IP protocol
26 *					crash.
27 *		Florian  La Roche:	Changed for my new skbuff handling.
28 *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
29 *		Linus Torvalds	:	BSD semantic fixes.
30 *		Alan Cox	:	Datagram iovec handling
31 *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
32 *		Alan Cox	:	POSIXisms
33 *		Pete Wyckoff    :       Unconnected accept() fix.
34 *
35 */
36
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/kernel.h>
40#include <linux/uaccess.h>
41#include <linux/mm.h>
42#include <linux/interrupt.h>
43#include <linux/errno.h>
44#include <linux/sched.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/rtnetlink.h>
48#include <linux/poll.h>
49#include <linux/highmem.h>
50#include <linux/spinlock.h>
51#include <linux/slab.h>
52#include <linux/pagemap.h>
53#include <linux/iov_iter.h>
54#include <linux/indirect_call_wrapper.h>
55
56#include <net/protocol.h>
57#include <linux/skbuff.h>
58
59#include <net/checksum.h>
60#include <net/sock.h>
61#include <net/tcp_states.h>
62#include <trace/events/skb.h>
63#include <net/busy_poll.h>
64#include <crypto/hash.h>
65
66/*
67 *	Is a socket 'connection oriented' ?
68 */
69static inline int connection_based(struct sock *sk)
70{
71	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
72}
73
74static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
75				  void *key)
76{
77	/*
78	 * Avoid a wakeup if event not interesting for us
79	 */
80	if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
81		return 0;
82	return autoremove_wake_function(wait, mode, sync, key);
83}
84/*
85 * Wait for the last received packet to be different from skb
86 */
87int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
88				int *err, long *timeo_p,
89				const struct sk_buff *skb)
90{
91	int error;
92	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
93
94	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
95
96	/* Socket errors? */
97	error = sock_error(sk);
98	if (error)
99		goto out_err;
100
101	if (READ_ONCE(queue->prev) != skb)
102		goto out;
103
104	/* Socket shut down? */
105	if (sk->sk_shutdown & RCV_SHUTDOWN)
106		goto out_noerr;
107
108	/* Sequenced packets can come disconnected.
109	 * If so we report the problem
110	 */
111	error = -ENOTCONN;
112	if (connection_based(sk) &&
113	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
114		goto out_err;
115
116	/* handle signals */
117	if (signal_pending(current))
118		goto interrupted;
119
120	error = 0;
121	*timeo_p = schedule_timeout(*timeo_p);
122out:
123	finish_wait(sk_sleep(sk), &wait);
124	return error;
125interrupted:
126	error = sock_intr_errno(*timeo_p);
127out_err:
128	*err = error;
129	goto out;
130out_noerr:
131	*err = 0;
132	error = 1;
133	goto out;
134}
135EXPORT_SYMBOL(__skb_wait_for_more_packets);
136
137static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
138{
139	struct sk_buff *nskb;
140
141	if (skb->peeked)
142		return skb;
143
144	/* We have to unshare an skb before modifying it. */
145	if (!skb_shared(skb))
146		goto done;
147
148	nskb = skb_clone(skb, GFP_ATOMIC);
149	if (!nskb)
150		return ERR_PTR(-ENOMEM);
151
152	skb->prev->next = nskb;
153	skb->next->prev = nskb;
154	nskb->prev = skb->prev;
155	nskb->next = skb->next;
156
157	consume_skb(skb);
158	skb = nskb;
159
160done:
161	skb->peeked = 1;
162
163	return skb;
164}
165
166struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
167					  struct sk_buff_head *queue,
168					  unsigned int flags,
169					  int *off, int *err,
170					  struct sk_buff **last)
171{
172	bool peek_at_off = false;
173	struct sk_buff *skb;
174	int _off = 0;
175
176	if (unlikely(flags & MSG_PEEK && *off >= 0)) {
177		peek_at_off = true;
178		_off = *off;
179	}
180
181	*last = queue->prev;
182	skb_queue_walk(queue, skb) {
183		if (flags & MSG_PEEK) {
184			if (peek_at_off && _off >= skb->len &&
185			    (_off || skb->peeked)) {
186				_off -= skb->len;
187				continue;
188			}
189			if (!skb->len) {
190				skb = skb_set_peeked(skb);
191				if (IS_ERR(skb)) {
192					*err = PTR_ERR(skb);
193					return NULL;
194				}
195			}
196			refcount_inc(&skb->users);
197		} else {
198			__skb_unlink(skb, queue);
199		}
200		*off = _off;
201		return skb;
202	}
203	return NULL;
204}
205
206/**
207 *	__skb_try_recv_datagram - Receive a datagram skbuff
208 *	@sk: socket
209 *	@queue: socket queue from which to receive
210 *	@flags: MSG\_ flags
211 *	@off: an offset in bytes to peek skb from. Returns an offset
212 *	      within an skb where data actually starts
213 *	@err: error code returned
214 *	@last: set to last peeked message to inform the wait function
215 *	       what to look for when peeking
216 *
217 *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
218 *	and possible races. This replaces identical code in packet, raw and
219 *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
220 *	the long standing peek and read race for datagram sockets. If you
221 *	alter this routine remember it must be re-entrant.
222 *
223 *	This function will lock the socket if a skb is returned, so
224 *	the caller needs to unlock the socket in that case (usually by
225 *	calling skb_free_datagram). Returns NULL with @err set to
226 *	-EAGAIN if no data was available or to some other value if an
227 *	error was detected.
228 *
229 *	* It does not lock socket since today. This function is
230 *	* free of race conditions. This measure should/can improve
231 *	* significantly datagram socket latencies at high loads,
232 *	* when data copying to user space takes lots of time.
233 *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
234 *	*  8) Great win.)
235 *	*			                    --ANK (980729)
236 *
237 *	The order of the tests when we find no data waiting are specified
238 *	quite explicitly by POSIX 1003.1g, don't change them without having
239 *	the standard around please.
240 */
241struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
242					struct sk_buff_head *queue,
243					unsigned int flags, int *off, int *err,
244					struct sk_buff **last)
245{
246	struct sk_buff *skb;
247	unsigned long cpu_flags;
248	/*
249	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
250	 */
251	int error = sock_error(sk);
252
253	if (error)
254		goto no_packet;
255
256	do {
257		/* Again only user level code calls this function, so nothing
258		 * interrupt level will suddenly eat the receive_queue.
259		 *
260		 * Look at current nfs client by the way...
261		 * However, this function was correct in any case. 8)
262		 */
263		spin_lock_irqsave(&queue->lock, cpu_flags);
264		skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error,
265						last);
266		spin_unlock_irqrestore(&queue->lock, cpu_flags);
267		if (error)
268			goto no_packet;
269		if (skb)
270			return skb;
271
272		if (!sk_can_busy_loop(sk))
273			break;
274
275		sk_busy_loop(sk, flags & MSG_DONTWAIT);
276	} while (READ_ONCE(queue->prev) != *last);
277
278	error = -EAGAIN;
279
280no_packet:
281	*err = error;
282	return NULL;
283}
284EXPORT_SYMBOL(__skb_try_recv_datagram);
285
286struct sk_buff *__skb_recv_datagram(struct sock *sk,
287				    struct sk_buff_head *sk_queue,
288				    unsigned int flags, int *off, int *err)
289{
290	struct sk_buff *skb, *last;
291	long timeo;
292
293	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
294
295	do {
296		skb = __skb_try_recv_datagram(sk, sk_queue, flags, off, err,
297					      &last);
298		if (skb)
299			return skb;
300
301		if (*err != -EAGAIN)
302			break;
303	} while (timeo &&
304		 !__skb_wait_for_more_packets(sk, sk_queue, err,
305					      &timeo, last));
306
307	return NULL;
308}
309EXPORT_SYMBOL(__skb_recv_datagram);
310
311struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
312				  int *err)
313{
314	int off = 0;
315
316	return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags,
317				   &off, err);
318}
319EXPORT_SYMBOL(skb_recv_datagram);
320
321void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
322{
323	consume_skb(skb);
324}
325EXPORT_SYMBOL(skb_free_datagram);
326
327void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
328{
329	bool slow;
330
331	if (!skb_unref(skb)) {
332		sk_peek_offset_bwd(sk, len);
333		return;
334	}
335
336	slow = lock_sock_fast(sk);
337	sk_peek_offset_bwd(sk, len);
338	skb_orphan(skb);
339	unlock_sock_fast(sk, slow);
340
341	/* skb is now orphaned, can be freed outside of locked section */
342	__kfree_skb(skb);
343}
344EXPORT_SYMBOL(__skb_free_datagram_locked);
345
346int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
347			struct sk_buff *skb, unsigned int flags,
348			void (*destructor)(struct sock *sk,
349					   struct sk_buff *skb))
350{
351	int err = 0;
352
353	if (flags & MSG_PEEK) {
354		err = -ENOENT;
355		spin_lock_bh(&sk_queue->lock);
356		if (skb->next) {
357			__skb_unlink(skb, sk_queue);
358			refcount_dec(&skb->users);
359			if (destructor)
360				destructor(sk, skb);
361			err = 0;
362		}
363		spin_unlock_bh(&sk_queue->lock);
364	}
365
366	atomic_inc(&sk->sk_drops);
367	return err;
368}
369EXPORT_SYMBOL(__sk_queue_drop_skb);
370
371/**
372 *	skb_kill_datagram - Free a datagram skbuff forcibly
373 *	@sk: socket
374 *	@skb: datagram skbuff
375 *	@flags: MSG\_ flags
376 *
377 *	This function frees a datagram skbuff that was received by
378 *	skb_recv_datagram.  The flags argument must match the one
379 *	used for skb_recv_datagram.
380 *
381 *	If the MSG_PEEK flag is set, and the packet is still on the
382 *	receive queue of the socket, it will be taken off the queue
383 *	before it is freed.
384 *
385 *	This function currently only disables BH when acquiring the
386 *	sk_receive_queue lock.  Therefore it must not be used in a
387 *	context where that lock is acquired in an IRQ context.
388 *
389 *	It returns 0 if the packet was removed by us.
390 */
391
392int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
393{
394	int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
395				      NULL);
396
397	kfree_skb(skb);
398	return err;
399}
400EXPORT_SYMBOL(skb_kill_datagram);
401
402INDIRECT_CALLABLE_DECLARE(static size_t simple_copy_to_iter(const void *addr,
403						size_t bytes,
404						void *data __always_unused,
405						struct iov_iter *i));
406
407static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
408			       struct iov_iter *to, int len, bool fault_short,
409			       size_t (*cb)(const void *, size_t, void *,
410					    struct iov_iter *), void *data)
411{
412	int start = skb_headlen(skb);
413	int i, copy = start - offset, start_off = offset, n;
414	struct sk_buff *frag_iter;
415
416	/* Copy header. */
417	if (copy > 0) {
418		if (copy > len)
419			copy = len;
420		n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
421				    skb->data + offset, copy, data, to);
422		offset += n;
423		if (n != copy)
424			goto short_copy;
425		if ((len -= copy) == 0)
426			return 0;
427	}
428
429	/* Copy paged appendix. Hmm... why does this look so complicated? */
430	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
431		int end;
432		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
433
434		WARN_ON(start > offset + len);
435
436		end = start + skb_frag_size(frag);
437		if ((copy = end - offset) > 0) {
438			struct page *page = skb_frag_page(frag);
439			u8 *vaddr = kmap(page);
440
441			if (copy > len)
442				copy = len;
443			n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
444					vaddr + skb_frag_off(frag) + offset - start,
445					copy, data, to);
446			kunmap(page);
447			offset += n;
448			if (n != copy)
449				goto short_copy;
450			if (!(len -= copy))
451				return 0;
452		}
453		start = end;
454	}
455
456	skb_walk_frags(skb, frag_iter) {
457		int end;
458
459		WARN_ON(start > offset + len);
460
461		end = start + frag_iter->len;
462		if ((copy = end - offset) > 0) {
463			if (copy > len)
464				copy = len;
465			if (__skb_datagram_iter(frag_iter, offset - start,
466						to, copy, fault_short, cb, data))
467				goto fault;
468			if ((len -= copy) == 0)
469				return 0;
470			offset += copy;
471		}
472		start = end;
473	}
474	if (!len)
475		return 0;
476
477	/* This is not really a user copy fault, but rather someone
478	 * gave us a bogus length on the skb.  We should probably
479	 * print a warning here as it may indicate a kernel bug.
480	 */
481
482fault:
483	iov_iter_revert(to, offset - start_off);
484	return -EFAULT;
485
486short_copy:
487	if (fault_short || iov_iter_count(to))
488		goto fault;
489
490	return 0;
491}
492
493static size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
494				    struct iov_iter *i)
495{
496#ifdef CONFIG_CRYPTO_HASH
497	struct ahash_request *hash = hashp;
498	struct scatterlist sg;
499	size_t copied;
500
501	copied = copy_to_iter(addr, bytes, i);
502	sg_init_one(&sg, addr, copied);
503	ahash_request_set_crypt(hash, &sg, NULL, copied);
504	crypto_ahash_update(hash);
505	return copied;
506#else
507	return 0;
508#endif
509}
510
511/**
512 *	skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator
513 *          and update a hash.
514 *	@skb: buffer to copy
515 *	@offset: offset in the buffer to start copying from
516 *	@to: iovec iterator to copy to
517 *	@len: amount of data to copy from buffer to iovec
518 *      @hash: hash request to update
519 */
520int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
521			   struct iov_iter *to, int len,
522			   struct ahash_request *hash)
523{
524	return __skb_datagram_iter(skb, offset, to, len, true,
525			hash_and_copy_to_iter, hash);
526}
527EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter);
528
529static size_t simple_copy_to_iter(const void *addr, size_t bytes,
530		void *data __always_unused, struct iov_iter *i)
531{
532	return copy_to_iter(addr, bytes, i);
533}
534
535/**
536 *	skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
537 *	@skb: buffer to copy
538 *	@offset: offset in the buffer to start copying from
539 *	@to: iovec iterator to copy to
540 *	@len: amount of data to copy from buffer to iovec
541 */
542int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
543			   struct iov_iter *to, int len)
544{
545	trace_skb_copy_datagram_iovec(skb, len);
546	return __skb_datagram_iter(skb, offset, to, len, false,
547			simple_copy_to_iter, NULL);
548}
549EXPORT_SYMBOL(skb_copy_datagram_iter);
550
551/**
552 *	skb_copy_datagram_from_iter - Copy a datagram from an iov_iter.
553 *	@skb: buffer to copy
554 *	@offset: offset in the buffer to start copying to
555 *	@from: the copy source
556 *	@len: amount of data to copy to buffer from iovec
557 *
558 *	Returns 0 or -EFAULT.
559 */
560int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
561				 struct iov_iter *from,
562				 int len)
563{
564	int start = skb_headlen(skb);
565	int i, copy = start - offset;
566	struct sk_buff *frag_iter;
567
568	/* Copy header. */
569	if (copy > 0) {
570		if (copy > len)
571			copy = len;
572		if (copy_from_iter(skb->data + offset, copy, from) != copy)
573			goto fault;
574		if ((len -= copy) == 0)
575			return 0;
576		offset += copy;
577	}
578
579	/* Copy paged appendix. Hmm... why does this look so complicated? */
580	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
581		int end;
582		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
583
584		WARN_ON(start > offset + len);
585
586		end = start + skb_frag_size(frag);
587		if ((copy = end - offset) > 0) {
588			size_t copied;
589
590			if (copy > len)
591				copy = len;
592			copied = copy_page_from_iter(skb_frag_page(frag),
593					  skb_frag_off(frag) + offset - start,
594					  copy, from);
595			if (copied != copy)
596				goto fault;
597
598			if (!(len -= copy))
599				return 0;
600			offset += copy;
601		}
602		start = end;
603	}
604
605	skb_walk_frags(skb, frag_iter) {
606		int end;
607
608		WARN_ON(start > offset + len);
609
610		end = start + frag_iter->len;
611		if ((copy = end - offset) > 0) {
612			if (copy > len)
613				copy = len;
614			if (skb_copy_datagram_from_iter(frag_iter,
615							offset - start,
616							from, copy))
617				goto fault;
618			if ((len -= copy) == 0)
619				return 0;
620			offset += copy;
621		}
622		start = end;
623	}
624	if (!len)
625		return 0;
626
627fault:
628	return -EFAULT;
629}
630EXPORT_SYMBOL(skb_copy_datagram_from_iter);
631
632int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
633			    struct sk_buff *skb, struct iov_iter *from,
634			    size_t length)
635{
636	int frag;
637
638	if (msg && msg->msg_ubuf && msg->sg_from_iter)
639		return msg->sg_from_iter(sk, skb, from, length);
640
641	frag = skb_shinfo(skb)->nr_frags;
642
643	while (length && iov_iter_count(from)) {
644		struct page *head, *last_head = NULL;
645		struct page *pages[MAX_SKB_FRAGS];
646		int refs, order, n = 0;
647		size_t start;
648		ssize_t copied;
649		unsigned long truesize;
650
651		if (frag == MAX_SKB_FRAGS)
652			return -EMSGSIZE;
653
654		copied = iov_iter_get_pages2(from, pages, length,
655					    MAX_SKB_FRAGS - frag, &start);
656		if (copied < 0)
657			return -EFAULT;
658
659		length -= copied;
660
661		truesize = PAGE_ALIGN(copied + start);
662		skb->data_len += copied;
663		skb->len += copied;
664		skb->truesize += truesize;
665		if (sk && sk->sk_type == SOCK_STREAM) {
666			sk_wmem_queued_add(sk, truesize);
667			if (!skb_zcopy_pure(skb))
668				sk_mem_charge(sk, truesize);
669		} else {
670			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
671		}
672
673		head = compound_head(pages[n]);
674		order = compound_order(head);
675
676		for (refs = 0; copied != 0; start = 0) {
677			int size = min_t(int, copied, PAGE_SIZE - start);
678
679			if (pages[n] - head > (1UL << order) - 1) {
680				head = compound_head(pages[n]);
681				order = compound_order(head);
682			}
683
684			start += (pages[n] - head) << PAGE_SHIFT;
685			copied -= size;
686			n++;
687			if (frag) {
688				skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
689
690				if (head == skb_frag_page(last) &&
691				    start == skb_frag_off(last) + skb_frag_size(last)) {
692					skb_frag_size_add(last, size);
693					/* We combined this page, we need to release
694					 * a reference. Since compound pages refcount
695					 * is shared among many pages, batch the refcount
696					 * adjustments to limit false sharing.
697					 */
698					last_head = head;
699					refs++;
700					continue;
701				}
702			}
703			if (refs) {
704				page_ref_sub(last_head, refs);
705				refs = 0;
706			}
707			skb_fill_page_desc_noacc(skb, frag++, head, start, size);
708		}
709		if (refs)
710			page_ref_sub(last_head, refs);
711	}
712	return 0;
713}
714EXPORT_SYMBOL(__zerocopy_sg_from_iter);
715
716/**
717 *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
718 *	@skb: buffer to copy
719 *	@from: the source to copy from
720 *
721 *	The function will first copy up to headlen, and then pin the userspace
722 *	pages and build frags through them.
723 *
724 *	Returns 0, -EFAULT or -EMSGSIZE.
725 */
726int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
727{
728	int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
729
730	/* copy up to skb headlen */
731	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
732		return -EFAULT;
733
734	return __zerocopy_sg_from_iter(NULL, NULL, skb, from, ~0U);
735}
736EXPORT_SYMBOL(zerocopy_sg_from_iter);
737
738static __always_inline
739size_t copy_to_user_iter_csum(void __user *iter_to, size_t progress,
740			      size_t len, void *from, void *priv2)
741{
742	__wsum next, *csum = priv2;
743
744	next = csum_and_copy_to_user(from + progress, iter_to, len);
745	*csum = csum_block_add(*csum, next, progress);
746	return next ? 0 : len;
747}
748
749static __always_inline
750size_t memcpy_to_iter_csum(void *iter_to, size_t progress,
751			   size_t len, void *from, void *priv2)
752{
753	__wsum *csum = priv2;
754	__wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len);
755
756	*csum = csum_block_add(*csum, next, progress);
757	return 0;
758}
759
760struct csum_state {
761	__wsum csum;
762	size_t off;
763};
764
765static size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
766				    struct iov_iter *i)
767{
768	struct csum_state *csstate = _csstate;
769	__wsum sum;
770
771	if (WARN_ON_ONCE(i->data_source))
772		return 0;
773	if (unlikely(iov_iter_is_discard(i))) {
774		// can't use csum_memcpy() for that one - data is not copied
775		csstate->csum = csum_block_add(csstate->csum,
776					       csum_partial(addr, bytes, 0),
777					       csstate->off);
778		csstate->off += bytes;
779		return bytes;
780	}
781
782	sum = csum_shift(csstate->csum, csstate->off);
783
784	bytes = iterate_and_advance2(i, bytes, (void *)addr, &sum,
785				     copy_to_user_iter_csum,
786				     memcpy_to_iter_csum);
787	csstate->csum = csum_shift(sum, csstate->off);
788	csstate->off += bytes;
789	return bytes;
790}
791
792/**
793 *	skb_copy_and_csum_datagram - Copy datagram to an iovec iterator
794 *          and update a checksum.
795 *	@skb: buffer to copy
796 *	@offset: offset in the buffer to start copying from
797 *	@to: iovec iterator to copy to
798 *	@len: amount of data to copy from buffer to iovec
799 *      @csump: checksum pointer
800 */
801static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
802				      struct iov_iter *to, int len,
803				      __wsum *csump)
804{
805	struct csum_state csdata = { .csum = *csump };
806	int ret;
807
808	ret = __skb_datagram_iter(skb, offset, to, len, true,
809				  csum_and_copy_to_iter, &csdata);
810	if (ret)
811		return ret;
812
813	*csump = csdata.csum;
814	return 0;
815}
816
817/**
818 *	skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
819 *	@skb: skbuff
820 *	@hlen: hardware length
821 *	@msg: destination
822 *
823 *	Caller _must_ check that skb will fit to this iovec.
824 *
825 *	Returns: 0       - success.
826 *		 -EINVAL - checksum failure.
827 *		 -EFAULT - fault during copy.
828 */
829int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
830				   int hlen, struct msghdr *msg)
831{
832	__wsum csum;
833	int chunk = skb->len - hlen;
834
835	if (!chunk)
836		return 0;
837
838	if (msg_data_left(msg) < chunk) {
839		if (__skb_checksum_complete(skb))
840			return -EINVAL;
841		if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
842			goto fault;
843	} else {
844		csum = csum_partial(skb->data, hlen, skb->csum);
845		if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
846					       chunk, &csum))
847			goto fault;
848
849		if (csum_fold(csum)) {
850			iov_iter_revert(&msg->msg_iter, chunk);
851			return -EINVAL;
852		}
853
854		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
855		    !skb->csum_complete_sw)
856			netdev_rx_csum_fault(NULL, skb);
857	}
858	return 0;
859fault:
860	return -EFAULT;
861}
862EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
863
864/**
865 * 	datagram_poll - generic datagram poll
866 *	@file: file struct
867 *	@sock: socket
868 *	@wait: poll table
869 *
870 *	Datagram poll: Again totally generic. This also handles
871 *	sequenced packet sockets providing the socket receive queue
872 *	is only ever holding data ready to receive.
873 *
874 *	Note: when you *don't* use this routine for this protocol,
875 *	and you use a different write policy from sock_writeable()
876 *	then please supply your own write_space callback.
877 */
878__poll_t datagram_poll(struct file *file, struct socket *sock,
879			   poll_table *wait)
880{
881	struct sock *sk = sock->sk;
882	__poll_t mask;
883	u8 shutdown;
884
885	sock_poll_wait(file, sock, wait);
886	mask = 0;
887
888	/* exceptional events? */
889	if (READ_ONCE(sk->sk_err) ||
890	    !skb_queue_empty_lockless(&sk->sk_error_queue))
891		mask |= EPOLLERR |
892			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
893
894	shutdown = READ_ONCE(sk->sk_shutdown);
895	if (shutdown & RCV_SHUTDOWN)
896		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
897	if (shutdown == SHUTDOWN_MASK)
898		mask |= EPOLLHUP;
899
900	/* readable? */
901	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
902		mask |= EPOLLIN | EPOLLRDNORM;
903
904	/* Connection-based need to check for termination and startup */
905	if (connection_based(sk)) {
906		int state = READ_ONCE(sk->sk_state);
907
908		if (state == TCP_CLOSE)
909			mask |= EPOLLHUP;
910		/* connection hasn't started yet? */
911		if (state == TCP_SYN_SENT)
912			return mask;
913	}
914
915	/* writable? */
916	if (sock_writeable(sk))
917		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
918	else
919		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
920
921	return mask;
922}
923EXPORT_SYMBOL(datagram_poll);
924