1/*
2 *     SUCS NET3:
3 *
4 *     Generic stream handling routines. These are generic for most
5 *     protocols. Even IP. Tonight 8-).
6 *     This is used because TCP, LLC (others too) layer all have mostly
7 *     identical sendmsg() and recvmsg() code.
8 *     So we (will) share it here.
9 *
10 *     Authors:        Arnaldo Carvalho de Melo <acme@conectiva.com.br>
11 *                     (from old tcp.c code)
12 *                     Alan Cox <alan@redhat.com> (Borrowed comments 8-))
13 */
14
15#include <linux/module.h>
16#include <linux/net.h>
17#include <linux/signal.h>
18#include <linux/tcp.h>
19#include <linux/wait.h>
20#include <net/sock.h>
21
22void sk_stream_write_space(struct sock *sk)
23{
24	struct socket *sock = sk->sk_socket;
25
26	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
27		clear_bit(SOCK_NOSPACE, &sock->flags);
28
29		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
30			wake_up_interruptible(sk->sk_sleep);
31		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
32			sock_wake_async(sock, 2, POLL_OUT);
33	}
34}
35
36EXPORT_SYMBOL(sk_stream_write_space);
37
38/**
39 * sk_stream_wait_connect - Wait for a socket to get into the connected state
40 * @sk: sock to wait on
41 * @timeo_p: for how long to wait
42 *
43 * Must be called with the socket locked.
44 */
45int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
46{
47	struct task_struct *tsk = current;
48	DEFINE_WAIT(wait);
49	int done;
50
51	do {
52		int err = sock_error(sk);
53		if (err)
54			return err;
55		if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
56			return -EPIPE;
57		if (!*timeo_p)
58			return -EAGAIN;
59		if (signal_pending(tsk))
60			return sock_intr_errno(*timeo_p);
61
62		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
63		sk->sk_write_pending++;
64		done = sk_wait_event(sk, timeo_p,
65				     !sk->sk_err &&
66				     !((1 << sk->sk_state) &
67				       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
68		finish_wait(sk->sk_sleep, &wait);
69		sk->sk_write_pending--;
70	} while (!done);
71	return 0;
72}
73
74EXPORT_SYMBOL(sk_stream_wait_connect);
75
76/**
77 * sk_stream_closing - Return 1 if we still have things to send in our buffers.
78 * @sk: socket to verify
79 */
80static inline int sk_stream_closing(struct sock *sk)
81{
82	return (1 << sk->sk_state) &
83	       (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
84}
85
86void sk_stream_wait_close(struct sock *sk, long timeout)
87{
88	if (timeout) {
89		DEFINE_WAIT(wait);
90
91		do {
92			prepare_to_wait(sk->sk_sleep, &wait,
93					TASK_INTERRUPTIBLE);
94			if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
95				break;
96		} while (!signal_pending(current) && timeout);
97
98		finish_wait(sk->sk_sleep, &wait);
99	}
100}
101
102EXPORT_SYMBOL(sk_stream_wait_close);
103
104/**
105 * sk_stream_wait_memory - Wait for more memory for a socket
106 * @sk: socket to wait for memory
107 * @timeo_p: for how long
108 */
109int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
110{
111	int err = 0;
112	long vm_wait = 0;
113	long current_timeo = *timeo_p;
114	DEFINE_WAIT(wait);
115
116	if (sk_stream_memory_free(sk))
117		current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
118
119	while (1) {
120		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
121
122		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
123
124		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
125			goto do_error;
126		if (!*timeo_p)
127			goto do_nonblock;
128		if (signal_pending(current))
129			goto do_interrupted;
130		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
131		if (sk_stream_memory_free(sk) && !vm_wait)
132			break;
133
134		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
135		sk->sk_write_pending++;
136		sk_wait_event(sk, &current_timeo, !sk->sk_err &&
137						  !(sk->sk_shutdown & SEND_SHUTDOWN) &&
138						  sk_stream_memory_free(sk) &&
139						  vm_wait);
140		sk->sk_write_pending--;
141
142		if (vm_wait) {
143			vm_wait -= current_timeo;
144			current_timeo = *timeo_p;
145			if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
146			    (current_timeo -= vm_wait) < 0)
147				current_timeo = 0;
148			vm_wait = 0;
149		}
150		*timeo_p = current_timeo;
151	}
152out:
153	finish_wait(sk->sk_sleep, &wait);
154	return err;
155
156do_error:
157	err = -EPIPE;
158	goto out;
159do_nonblock:
160	err = -EAGAIN;
161	goto out;
162do_interrupted:
163	err = sock_intr_errno(*timeo_p);
164	goto out;
165}
166
167EXPORT_SYMBOL(sk_stream_wait_memory);
168
169void sk_stream_rfree(struct sk_buff *skb)
170{
171	struct sock *sk = skb->sk;
172
173	skb_truesize_check(skb);
174	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
175	sk->sk_forward_alloc += skb->truesize;
176}
177
178EXPORT_SYMBOL(sk_stream_rfree);
179
180int sk_stream_error(struct sock *sk, int flags, int err)
181{
182	if (err == -EPIPE)
183		err = sock_error(sk) ? : -EPIPE;
184	if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
185		send_sig(SIGPIPE, current, 0);
186	return err;
187}
188
189EXPORT_SYMBOL(sk_stream_error);
190
191void __sk_stream_mem_reclaim(struct sock *sk)
192{
193	atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
194		   sk->sk_prot->memory_allocated);
195	sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
196	if (*sk->sk_prot->memory_pressure &&
197	    (atomic_read(sk->sk_prot->memory_allocated) <
198	     sk->sk_prot->sysctl_mem[0]))
199		*sk->sk_prot->memory_pressure = 0;
200}
201
202EXPORT_SYMBOL(__sk_stream_mem_reclaim);
203
204int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
205{
206	int amt = sk_stream_pages(size);
207
208	sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
209	atomic_add(amt, sk->sk_prot->memory_allocated);
210
211	/* Under limit. */
212	if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
213		if (*sk->sk_prot->memory_pressure)
214			*sk->sk_prot->memory_pressure = 0;
215		return 1;
216	}
217
218	/* Over hard limit. */
219	if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
220		sk->sk_prot->enter_memory_pressure();
221		goto suppress_allocation;
222	}
223
224	/* Under pressure. */
225	if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
226		sk->sk_prot->enter_memory_pressure();
227
228	if (kind) {
229		if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
230			return 1;
231	} else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
232		return 1;
233
234	if (!*sk->sk_prot->memory_pressure ||
235	    sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
236				sk_stream_pages(sk->sk_wmem_queued +
237						atomic_read(&sk->sk_rmem_alloc) +
238						sk->sk_forward_alloc))
239		return 1;
240
241suppress_allocation:
242
243	if (!kind) {
244		sk_stream_moderate_sndbuf(sk);
245
246		/* Fail only if socket is _under_ its sndbuf.
247		 * In this case we cannot block, so that we have to fail.
248		 */
249		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
250			return 1;
251	}
252
253	/* Alas. Undo changes. */
254	sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
255	atomic_sub(amt, sk->sk_prot->memory_allocated);
256	return 0;
257}
258
259EXPORT_SYMBOL(sk_stream_mem_schedule);
260
261void sk_stream_kill_queues(struct sock *sk)
262{
263	/* First the read buffer. */
264	__skb_queue_purge(&sk->sk_receive_queue);
265
266	/* Next, the error queue. */
267	__skb_queue_purge(&sk->sk_error_queue);
268
269	/* Next, the write queue. */
270	BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
271
272	/* Account for returned memory. */
273	sk_stream_mem_reclaim(sk);
274
275	BUG_TRAP(!sk->sk_wmem_queued);
276	BUG_TRAP(!sk->sk_forward_alloc);
277
278	/* It is _impossible_ for the backlog to contain anything
279	 * when we get here.  All user references to this socket
280	 * have gone away, only the net layer knows can touch it.
281	 */
282}
283
284EXPORT_SYMBOL(sk_stream_kill_queues);
285