1// SPDX-License-Identifier: GPL-2.0-or-later
2/* RxRPC recvmsg() implementation
3 *
4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10#include <linux/net.h>
11#include <linux/skbuff.h>
12#include <linux/export.h>
13#include <linux/sched/signal.h>
14
15#include <net/sock.h>
16#include <net/af_rxrpc.h>
17#include "ar-internal.h"
18
19/*
20 * Post a call for attention by the socket or kernel service.  Further
21 * notifications are suppressed by putting recvmsg_link on a dummy queue.
22 */
23void rxrpc_notify_socket(struct rxrpc_call *call)
24{
25	struct rxrpc_sock *rx;
26	struct sock *sk;
27
28	_enter("%d", call->debug_id);
29
30	if (!list_empty(&call->recvmsg_link))
31		return;
32
33	rcu_read_lock();
34
35	rx = rcu_dereference(call->socket);
36	sk = &rx->sk;
37	if (rx && sk->sk_state < RXRPC_CLOSE) {
38		if (call->notify_rx) {
39			spin_lock(&call->notify_lock);
40			call->notify_rx(sk, call, call->user_call_ID);
41			spin_unlock(&call->notify_lock);
42		} else {
43			spin_lock(&rx->recvmsg_lock);
44			if (list_empty(&call->recvmsg_link)) {
45				rxrpc_get_call(call, rxrpc_call_get_notify_socket);
46				list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
47			}
48			spin_unlock(&rx->recvmsg_lock);
49
50			if (!sock_flag(sk, SOCK_DEAD)) {
51				_debug("call %ps", sk->sk_data_ready);
52				sk->sk_data_ready(sk);
53			}
54		}
55	}
56
57	rcu_read_unlock();
58	_leave("");
59}
60
61/*
62 * Pass a call terminating message to userspace.
63 */
64static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
65{
66	u32 tmp = 0;
67	int ret;
68
69	switch (call->completion) {
70	case RXRPC_CALL_SUCCEEDED:
71		ret = 0;
72		if (rxrpc_is_service_call(call))
73			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp);
74		break;
75	case RXRPC_CALL_REMOTELY_ABORTED:
76		tmp = call->abort_code;
77		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
78		break;
79	case RXRPC_CALL_LOCALLY_ABORTED:
80		tmp = call->abort_code;
81		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
82		break;
83	case RXRPC_CALL_NETWORK_ERROR:
84		tmp = -call->error;
85		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp);
86		break;
87	case RXRPC_CALL_LOCAL_ERROR:
88		tmp = -call->error;
89		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
90		break;
91	default:
92		pr_err("Invalid terminal call state %u\n", call->completion);
93		BUG();
94		break;
95	}
96
97	trace_rxrpc_recvdata(call, rxrpc_recvmsg_terminal,
98			     call->ackr_window - 1,
99			     call->rx_pkt_offset, call->rx_pkt_len, ret);
100	return ret;
101}
102
103/*
104 * Discard a packet we've used up and advance the Rx window by one.
105 */
106static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
107{
108	struct rxrpc_skb_priv *sp;
109	struct sk_buff *skb;
110	rxrpc_serial_t serial;
111	rxrpc_seq_t old_consumed = call->rx_consumed, tseq;
112	bool last;
113	int acked;
114
115	_enter("%d", call->debug_id);
116
117	skb = skb_dequeue(&call->recvmsg_queue);
118	rxrpc_see_skb(skb, rxrpc_skb_see_rotate);
119
120	sp = rxrpc_skb(skb);
121	tseq   = sp->hdr.seq;
122	serial = sp->hdr.serial;
123	last   = sp->hdr.flags & RXRPC_LAST_PACKET;
124
125	/* Barrier against rxrpc_input_data(). */
126	if (after(tseq, call->rx_consumed))
127		smp_store_release(&call->rx_consumed, tseq);
128
129	rxrpc_free_skb(skb, rxrpc_skb_put_rotate);
130
131	trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate,
132			    serial, call->rx_consumed);
133
134	if (last)
135		set_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags);
136
137	/* Check to see if there's an ACK that needs sending. */
138	acked = atomic_add_return(call->rx_consumed - old_consumed,
139				  &call->ackr_nr_consumed);
140	if (acked > 8 &&
141	    !test_and_set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
142		rxrpc_poke_call(call, rxrpc_call_poke_idle);
143}
144
145/*
146 * Decrypt and verify a DATA packet.
147 */
148static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb)
149{
150	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
151
152	if (sp->flags & RXRPC_RX_VERIFIED)
153		return 0;
154	return call->security->verify_packet(call, skb);
155}
156
157/*
158 * Deliver messages to a call.  This keeps processing packets until the buffer
159 * is filled and we find either more DATA (returns 0) or the end of the DATA
160 * (returns 1).  If more packets are required, it returns -EAGAIN and if the
161 * call has failed it returns -EIO.
162 */
163static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
164			      struct msghdr *msg, struct iov_iter *iter,
165			      size_t len, int flags, size_t *_offset)
166{
167	struct rxrpc_skb_priv *sp;
168	struct sk_buff *skb;
169	rxrpc_seq_t seq = 0;
170	size_t remain;
171	unsigned int rx_pkt_offset, rx_pkt_len;
172	int copy, ret = -EAGAIN, ret2;
173
174	rx_pkt_offset = call->rx_pkt_offset;
175	rx_pkt_len = call->rx_pkt_len;
176
177	if (rxrpc_call_has_failed(call)) {
178		seq = call->ackr_window - 1;
179		ret = -EIO;
180		goto done;
181	}
182
183	if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) {
184		seq = call->ackr_window - 1;
185		ret = 1;
186		goto done;
187	}
188
189	/* No one else can be removing stuff from the queue, so we shouldn't
190	 * need the Rx lock to walk it.
191	 */
192	skb = skb_peek(&call->recvmsg_queue);
193	while (skb) {
194		rxrpc_see_skb(skb, rxrpc_skb_see_recvmsg);
195		sp = rxrpc_skb(skb);
196		seq = sp->hdr.seq;
197
198		if (!(flags & MSG_PEEK))
199			trace_rxrpc_receive(call, rxrpc_receive_front,
200					    sp->hdr.serial, seq);
201
202		if (msg)
203			sock_recv_timestamp(msg, sock->sk, skb);
204
205		if (rx_pkt_offset == 0) {
206			ret2 = rxrpc_verify_data(call, skb);
207			trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq,
208					     sp->offset, sp->len, ret2);
209			if (ret2 < 0) {
210				kdebug("verify = %d", ret2);
211				ret = ret2;
212				goto out;
213			}
214			rx_pkt_offset = sp->offset;
215			rx_pkt_len = sp->len;
216		} else {
217			trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq,
218					     rx_pkt_offset, rx_pkt_len, 0);
219		}
220
221		/* We have to handle short, empty and used-up DATA packets. */
222		remain = len - *_offset;
223		copy = rx_pkt_len;
224		if (copy > remain)
225			copy = remain;
226		if (copy > 0) {
227			ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter,
228						      copy);
229			if (ret2 < 0) {
230				ret = ret2;
231				goto out;
232			}
233
234			/* handle piecemeal consumption of data packets */
235			rx_pkt_offset += copy;
236			rx_pkt_len -= copy;
237			*_offset += copy;
238		}
239
240		if (rx_pkt_len > 0) {
241			trace_rxrpc_recvdata(call, rxrpc_recvmsg_full, seq,
242					     rx_pkt_offset, rx_pkt_len, 0);
243			ASSERTCMP(*_offset, ==, len);
244			ret = 0;
245			break;
246		}
247
248		/* The whole packet has been transferred. */
249		if (sp->hdr.flags & RXRPC_LAST_PACKET)
250			ret = 1;
251		rx_pkt_offset = 0;
252		rx_pkt_len = 0;
253
254		skb = skb_peek_next(skb, &call->recvmsg_queue);
255
256		if (!(flags & MSG_PEEK))
257			rxrpc_rotate_rx_window(call);
258	}
259
260out:
261	if (!(flags & MSG_PEEK)) {
262		call->rx_pkt_offset = rx_pkt_offset;
263		call->rx_pkt_len = rx_pkt_len;
264	}
265done:
266	trace_rxrpc_recvdata(call, rxrpc_recvmsg_data_return, seq,
267			     rx_pkt_offset, rx_pkt_len, ret);
268	if (ret == -EAGAIN)
269		set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
270	return ret;
271}
272
273/*
274 * Receive a message from an RxRPC socket
275 * - we need to be careful about two or more threads calling recvmsg
276 *   simultaneously
277 */
278int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
279		  int flags)
280{
281	struct rxrpc_call *call;
282	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
283	struct list_head *l;
284	unsigned int call_debug_id = 0;
285	size_t copied = 0;
286	long timeo;
287	int ret;
288
289	DEFINE_WAIT(wait);
290
291	trace_rxrpc_recvmsg(0, rxrpc_recvmsg_enter, 0);
292
293	if (flags & (MSG_OOB | MSG_TRUNC))
294		return -EOPNOTSUPP;
295
296	timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
297
298try_again:
299	lock_sock(&rx->sk);
300
301	/* Return immediately if a client socket has no outstanding calls */
302	if (RB_EMPTY_ROOT(&rx->calls) &&
303	    list_empty(&rx->recvmsg_q) &&
304	    rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
305		release_sock(&rx->sk);
306		return -EAGAIN;
307	}
308
309	if (list_empty(&rx->recvmsg_q)) {
310		ret = -EWOULDBLOCK;
311		if (timeo == 0) {
312			call = NULL;
313			goto error_no_call;
314		}
315
316		release_sock(&rx->sk);
317
318		/* Wait for something to happen */
319		prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
320					  TASK_INTERRUPTIBLE);
321		ret = sock_error(&rx->sk);
322		if (ret)
323			goto wait_error;
324
325		if (list_empty(&rx->recvmsg_q)) {
326			if (signal_pending(current))
327				goto wait_interrupted;
328			trace_rxrpc_recvmsg(0, rxrpc_recvmsg_wait, 0);
329			timeo = schedule_timeout(timeo);
330		}
331		finish_wait(sk_sleep(&rx->sk), &wait);
332		goto try_again;
333	}
334
335	/* Find the next call and dequeue it if we're not just peeking.  If we
336	 * do dequeue it, that comes with a ref that we will need to release.
337	 * We also want to weed out calls that got requeued whilst we were
338	 * shovelling data out.
339	 */
340	spin_lock(&rx->recvmsg_lock);
341	l = rx->recvmsg_q.next;
342	call = list_entry(l, struct rxrpc_call, recvmsg_link);
343
344	if (!rxrpc_call_is_complete(call) &&
345	    skb_queue_empty(&call->recvmsg_queue)) {
346		list_del_init(&call->recvmsg_link);
347		spin_unlock(&rx->recvmsg_lock);
348		release_sock(&rx->sk);
349		trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0);
350		rxrpc_put_call(call, rxrpc_call_put_recvmsg);
351		goto try_again;
352	}
353
354	if (!(flags & MSG_PEEK))
355		list_del_init(&call->recvmsg_link);
356	else
357		rxrpc_get_call(call, rxrpc_call_get_recvmsg);
358	spin_unlock(&rx->recvmsg_lock);
359
360	call_debug_id = call->debug_id;
361	trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0);
362
363	/* We're going to drop the socket lock, so we need to lock the call
364	 * against interference by sendmsg.
365	 */
366	if (!mutex_trylock(&call->user_mutex)) {
367		ret = -EWOULDBLOCK;
368		if (flags & MSG_DONTWAIT)
369			goto error_requeue_call;
370		ret = -ERESTARTSYS;
371		if (mutex_lock_interruptible(&call->user_mutex) < 0)
372			goto error_requeue_call;
373	}
374
375	release_sock(&rx->sk);
376
377	if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
378		BUG();
379
380	if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
381		if (flags & MSG_CMSG_COMPAT) {
382			unsigned int id32 = call->user_call_ID;
383
384			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
385				       sizeof(unsigned int), &id32);
386		} else {
387			unsigned long idl = call->user_call_ID;
388
389			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
390				       sizeof(unsigned long), &idl);
391		}
392		if (ret < 0)
393			goto error_unlock_call;
394	}
395
396	if (msg->msg_name && call->peer) {
397		size_t len = sizeof(call->dest_srx);
398
399		memcpy(msg->msg_name, &call->dest_srx, len);
400		msg->msg_namelen = len;
401	}
402
403	ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
404				 flags, &copied);
405	if (ret == -EAGAIN)
406		ret = 0;
407	if (ret == -EIO)
408		goto call_failed;
409	if (ret < 0)
410		goto error_unlock_call;
411
412	if (rxrpc_call_is_complete(call) &&
413	    skb_queue_empty(&call->recvmsg_queue))
414		goto call_complete;
415	if (rxrpc_call_has_failed(call))
416		goto call_failed;
417
418	if (!skb_queue_empty(&call->recvmsg_queue))
419		rxrpc_notify_socket(call);
420	goto not_yet_complete;
421
422call_failed:
423	rxrpc_purge_queue(&call->recvmsg_queue);
424call_complete:
425	ret = rxrpc_recvmsg_term(call, msg);
426	if (ret < 0)
427		goto error_unlock_call;
428	if (!(flags & MSG_PEEK))
429		rxrpc_release_call(rx, call);
430	msg->msg_flags |= MSG_EOR;
431	ret = 1;
432
433not_yet_complete:
434	if (ret == 0)
435		msg->msg_flags |= MSG_MORE;
436	else
437		msg->msg_flags &= ~MSG_MORE;
438	ret = copied;
439
440error_unlock_call:
441	mutex_unlock(&call->user_mutex);
442	rxrpc_put_call(call, rxrpc_call_put_recvmsg);
443	trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
444	return ret;
445
446error_requeue_call:
447	if (!(flags & MSG_PEEK)) {
448		spin_lock(&rx->recvmsg_lock);
449		list_add(&call->recvmsg_link, &rx->recvmsg_q);
450		spin_unlock(&rx->recvmsg_lock);
451		trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
452	} else {
453		rxrpc_put_call(call, rxrpc_call_put_recvmsg);
454	}
455error_no_call:
456	release_sock(&rx->sk);
457error_trace:
458	trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
459	return ret;
460
461wait_interrupted:
462	ret = sock_intr_errno(timeo);
463wait_error:
464	finish_wait(sk_sleep(&rx->sk), &wait);
465	call = NULL;
466	goto error_trace;
467}
468
469/**
470 * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info
471 * @sock: The socket that the call exists on
472 * @call: The call to send data through
473 * @iter: The buffer to receive into
474 * @_len: The amount of data we want to receive (decreased on return)
475 * @want_more: True if more data is expected to be read
476 * @_abort: Where the abort code is stored if -ECONNABORTED is returned
477 * @_service: Where to store the actual service ID (may be upgraded)
478 *
479 * Allow a kernel service to receive data and pick up information about the
480 * state of a call.  Returns 0 if got what was asked for and there's more
481 * available, 1 if we got what was asked for and we're at the end of the data
482 * and -EAGAIN if we need more data.
483 *
484 * Note that we may return -EAGAIN to drain empty packets at the end of the
485 * data, even if we've already copied over the requested data.
486 *
487 * *_abort should also be initialised to 0.
488 */
489int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
490			   struct iov_iter *iter, size_t *_len,
491			   bool want_more, u32 *_abort, u16 *_service)
492{
493	size_t offset = 0;
494	int ret;
495
496	_enter("{%d},%zu,%d", call->debug_id, *_len, want_more);
497
498	mutex_lock(&call->user_mutex);
499
500	ret = rxrpc_recvmsg_data(sock, call, NULL, iter, *_len, 0, &offset);
501	*_len -= offset;
502	if (ret == -EIO)
503		goto call_failed;
504	if (ret < 0)
505		goto out;
506
507	/* We can only reach here with a partially full buffer if we have
508	 * reached the end of the data.  We must otherwise have a full buffer
509	 * or have been given -EAGAIN.
510	 */
511	if (ret == 1) {
512		if (iov_iter_count(iter) > 0)
513			goto short_data;
514		if (!want_more)
515			goto read_phase_complete;
516		ret = 0;
517		goto out;
518	}
519
520	if (!want_more)
521		goto excess_data;
522	goto out;
523
524read_phase_complete:
525	ret = 1;
526out:
527	if (_service)
528		*_service = call->dest_srx.srx_service;
529	mutex_unlock(&call->user_mutex);
530	_leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort);
531	return ret;
532
533short_data:
534	trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_short_data,
535			  call->cid, call->call_id, call->rx_consumed,
536			  0, -EBADMSG);
537	ret = -EBADMSG;
538	goto out;
539excess_data:
540	trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_excess_data,
541			  call->cid, call->call_id, call->rx_consumed,
542			  0, -EMSGSIZE);
543	ret = -EMSGSIZE;
544	goto out;
545call_failed:
546	*_abort = call->abort_code;
547	ret = call->error;
548	if (call->completion == RXRPC_CALL_SUCCEEDED) {
549		ret = 1;
550		if (iov_iter_count(iter) > 0)
551			ret = -ECONNRESET;
552	}
553	goto out;
554}
555EXPORT_SYMBOL(rxrpc_kernel_recv_data);
556