• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/net/tipc/
1/*
2 * net/tipc/socket.c: TIPC socket API
3 *
4 * Copyright (c) 2001-2007, Ericsson AB
5 * Copyright (c) 2004-2008, Wind River Systems
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 *    contributors may be used to endorse or promote products derived from
18 *    this software without specific prior written permission.
19 *
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/net.h>
40#include <linux/socket.h>
41#include <linux/errno.h>
42#include <linux/mm.h>
43#include <linux/poll.h>
44#include <linux/fcntl.h>
45#include <linux/gfp.h>
46#include <asm/string.h>
47#include <asm/atomic.h>
48#include <net/sock.h>
49
50#include <linux/tipc.h>
51#include <linux/tipc_config.h>
52#include <net/tipc/tipc_msg.h>
53#include <net/tipc/tipc_port.h>
54
55#include "core.h"
56
57#define SS_LISTENING	-1	/* socket is listening */
58#define SS_READY	-2	/* socket is connectionless */
59
60#define OVERLOAD_LIMIT_BASE	5000
61#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
62
63struct tipc_sock {
64	struct sock sk;
65	struct tipc_port *p;
66	struct tipc_portid peer_name;
67};
68
69#define tipc_sk(sk) ((struct tipc_sock *)(sk))
70#define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p))
71
72static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
73static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
74static void wakeupdispatch(struct tipc_port *tport);
75
76static const struct proto_ops packet_ops;
77static const struct proto_ops stream_ops;
78static const struct proto_ops msg_ops;
79
80static struct proto tipc_proto;
81
82static int sockets_enabled = 0;
83
84static atomic_t tipc_queue_size = ATOMIC_INIT(0);
85
86/*
87 * Revised TIPC socket locking policy:
88 *
89 * Most socket operations take the standard socket lock when they start
90 * and hold it until they finish (or until they need to sleep).  Acquiring
91 * this lock grants the owner exclusive access to the fields of the socket
92 * data structures, with the exception of the backlog queue.  A few socket
93 * operations can be done without taking the socket lock because they only
94 * read socket information that never changes during the life of the socket.
95 *
96 * Socket operations may acquire the lock for the associated TIPC port if they
97 * need to perform an operation on the port.  If any routine needs to acquire
98 * both the socket lock and the port lock it must take the socket lock first
99 * to avoid the risk of deadlock.
100 *
101 * The dispatcher handling incoming messages cannot grab the socket lock in
102 * the standard fashion, since invoked it runs at the BH level and cannot block.
103 * Instead, it checks to see if the socket lock is currently owned by someone,
104 * and either handles the message itself or adds it to the socket's backlog
105 * queue; in the latter case the queued message is processed once the process
106 * owning the socket lock releases it.
107 *
108 * NOTE: Releasing the socket lock while an operation is sleeping overcomes
109 * the problem of a blocked socket operation preventing any other operations
110 * from occurring.  However, applications must be careful if they have
111 * multiple threads trying to send (or receive) on the same socket, as these
112 * operations might interfere with each other.  For example, doing a connect
113 * and a receive at the same time might allow the receive to consume the
114 * ACK message meant for the connect.  While additional work could be done
115 * to try and overcome this, it doesn't seem to be worthwhile at the present.
116 *
117 * NOTE: Releasing the socket lock while an operation is sleeping also ensures
118 * that another operation that must be performed in a non-blocking manner is
119 * not delayed for very long because the lock has already been taken.
120 *
121 * NOTE: This code assumes that certain fields of a port/socket pair are
122 * constant over its lifetime; such fields can be examined without taking
123 * the socket lock and/or port lock, and do not need to be re-read even
124 * after resuming processing after waiting.  These fields include:
125 *   - socket type
126 *   - pointer to socket sk structure (aka tipc_sock structure)
127 *   - pointer to port structure
128 *   - port reference
129 */
130
131/**
132 * advance_rx_queue - discard first buffer in socket receive queue
133 *
134 * Caller must hold socket lock
135 */
136
137static void advance_rx_queue(struct sock *sk)
138{
139	buf_discard(__skb_dequeue(&sk->sk_receive_queue));
140	atomic_dec(&tipc_queue_size);
141}
142
143/**
144 * discard_rx_queue - discard all buffers in socket receive queue
145 *
146 * Caller must hold socket lock
147 */
148
149static void discard_rx_queue(struct sock *sk)
150{
151	struct sk_buff *buf;
152
153	while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
154		atomic_dec(&tipc_queue_size);
155		buf_discard(buf);
156	}
157}
158
159/**
160 * reject_rx_queue - reject all buffers in socket receive queue
161 *
162 * Caller must hold socket lock
163 */
164
165static void reject_rx_queue(struct sock *sk)
166{
167	struct sk_buff *buf;
168
169	while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
170		tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
171		atomic_dec(&tipc_queue_size);
172	}
173}
174
175/**
176 * tipc_create - create a TIPC socket
177 * @net: network namespace (must be default network)
178 * @sock: pre-allocated socket structure
179 * @protocol: protocol indicator (must be 0)
180 * @kern: caused by kernel or by userspace?
181 *
182 * This routine creates additional data structures used by the TIPC socket,
183 * initializes them, and links them together.
184 *
185 * Returns 0 on success, errno otherwise
186 */
187
188static int tipc_create(struct net *net, struct socket *sock, int protocol,
189		       int kern)
190{
191	const struct proto_ops *ops;
192	socket_state state;
193	struct sock *sk;
194	struct tipc_port *tp_ptr;
195
196	/* Validate arguments */
197
198	if (!net_eq(net, &init_net))
199		return -EAFNOSUPPORT;
200
201	if (unlikely(protocol != 0))
202		return -EPROTONOSUPPORT;
203
204	switch (sock->type) {
205	case SOCK_STREAM:
206		ops = &stream_ops;
207		state = SS_UNCONNECTED;
208		break;
209	case SOCK_SEQPACKET:
210		ops = &packet_ops;
211		state = SS_UNCONNECTED;
212		break;
213	case SOCK_DGRAM:
214	case SOCK_RDM:
215		ops = &msg_ops;
216		state = SS_READY;
217		break;
218	default:
219		return -EPROTOTYPE;
220	}
221
222	/* Allocate socket's protocol area */
223
224	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
225	if (sk == NULL)
226		return -ENOMEM;
227
228	/* Allocate TIPC port for socket to use */
229
230	tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch,
231				     TIPC_LOW_IMPORTANCE);
232	if (unlikely(!tp_ptr)) {
233		sk_free(sk);
234		return -ENOMEM;
235	}
236
237	/* Finish initializing socket data structures */
238
239	sock->ops = ops;
240	sock->state = state;
241
242	sock_init_data(sock, sk);
243	sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
244	sk->sk_backlog_rcv = backlog_rcv;
245	tipc_sk(sk)->p = tp_ptr;
246
247	spin_unlock_bh(tp_ptr->lock);
248
249	if (sock->state == SS_READY) {
250		tipc_set_portunreturnable(tp_ptr->ref, 1);
251		if (sock->type == SOCK_DGRAM)
252			tipc_set_portunreliable(tp_ptr->ref, 1);
253	}
254
255	atomic_inc(&tipc_user_count);
256	return 0;
257}
258
259/**
260 * release - destroy a TIPC socket
261 * @sock: socket to destroy
262 *
263 * This routine cleans up any messages that are still queued on the socket.
264 * For DGRAM and RDM socket types, all queued messages are rejected.
265 * For SEQPACKET and STREAM socket types, the first message is rejected
266 * and any others are discarded.  (If the first message on a STREAM socket
267 * is partially-read, it is discarded and the next one is rejected instead.)
268 *
269 * NOTE: Rejected messages are not necessarily returned to the sender!  They
270 * are returned or discarded according to the "destination droppable" setting
271 * specified for the message by the sender.
272 *
273 * Returns 0 on success, errno otherwise
274 */
275
276static int release(struct socket *sock)
277{
278	struct sock *sk = sock->sk;
279	struct tipc_port *tport;
280	struct sk_buff *buf;
281	int res;
282
283	/*
284	 * Exit if socket isn't fully initialized (occurs when a failed accept()
285	 * releases a pre-allocated child socket that was never used)
286	 */
287
288	if (sk == NULL)
289		return 0;
290
291	tport = tipc_sk_port(sk);
292	lock_sock(sk);
293
294	/*
295	 * Reject all unreceived messages, except on an active connection
296	 * (which disconnects locally & sends a 'FIN+' to peer)
297	 */
298
299	while (sock->state != SS_DISCONNECTING) {
300		buf = __skb_dequeue(&sk->sk_receive_queue);
301		if (buf == NULL)
302			break;
303		atomic_dec(&tipc_queue_size);
304		if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf)))
305			buf_discard(buf);
306		else {
307			if ((sock->state == SS_CONNECTING) ||
308			    (sock->state == SS_CONNECTED)) {
309				sock->state = SS_DISCONNECTING;
310				tipc_disconnect(tport->ref);
311			}
312			tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
313		}
314	}
315
316	/*
317	 * Delete TIPC port; this ensures no more messages are queued
318	 * (also disconnects an active connection & sends a 'FIN-' to peer)
319	 */
320
321	res = tipc_deleteport(tport->ref);
322
323	/* Discard any remaining (connection-based) messages in receive queue */
324
325	discard_rx_queue(sk);
326
327	/* Reject any messages that accumulated in backlog queue */
328
329	sock->state = SS_DISCONNECTING;
330	release_sock(sk);
331
332	sock_put(sk);
333	sock->sk = NULL;
334
335	atomic_dec(&tipc_user_count);
336	return res;
337}
338
339/**
340 * bind - associate or disassocate TIPC name(s) with a socket
341 * @sock: socket structure
342 * @uaddr: socket address describing name(s) and desired operation
343 * @uaddr_len: size of socket address data structure
344 *
345 * Name and name sequence binding is indicated using a positive scope value;
346 * a negative scope value unbinds the specified name.  Specifying no name
347 * (i.e. a socket address length of 0) unbinds all names from the socket.
348 *
349 * Returns 0 on success, errno otherwise
350 *
351 * NOTE: This routine doesn't need to take the socket lock since it doesn't
352 *       access any non-constant socket information.
353 */
354
355static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
356{
357	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
358	u32 portref = tipc_sk_port(sock->sk)->ref;
359
360	if (unlikely(!uaddr_len))
361		return tipc_withdraw(portref, 0, NULL);
362
363	if (uaddr_len < sizeof(struct sockaddr_tipc))
364		return -EINVAL;
365	if (addr->family != AF_TIPC)
366		return -EAFNOSUPPORT;
367
368	if (addr->addrtype == TIPC_ADDR_NAME)
369		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
370	else if (addr->addrtype != TIPC_ADDR_NAMESEQ)
371		return -EAFNOSUPPORT;
372
373	return (addr->scope > 0) ?
374		tipc_publish(portref, addr->scope, &addr->addr.nameseq) :
375		tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq);
376}
377
378/**
379 * get_name - get port ID of socket or peer socket
380 * @sock: socket structure
381 * @uaddr: area for returned socket address
382 * @uaddr_len: area for returned length of socket address
383 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
384 *
385 * Returns 0 on success, errno otherwise
386 *
387 * NOTE: This routine doesn't need to take the socket lock since it only
388 *       accesses socket information that is unchanging (or which changes in
389 * 	 a completely predictable manner).
390 */
391
392static int get_name(struct socket *sock, struct sockaddr *uaddr,
393		    int *uaddr_len, int peer)
394{
395	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
396	struct tipc_sock *tsock = tipc_sk(sock->sk);
397
398	if (peer) {
399		if ((sock->state != SS_CONNECTED) &&
400			((peer != 2) || (sock->state != SS_DISCONNECTING)))
401			return -ENOTCONN;
402		addr->addr.id.ref = tsock->peer_name.ref;
403		addr->addr.id.node = tsock->peer_name.node;
404	} else {
405		tipc_ownidentity(tsock->p->ref, &addr->addr.id);
406	}
407
408	*uaddr_len = sizeof(*addr);
409	addr->addrtype = TIPC_ADDR_ID;
410	addr->family = AF_TIPC;
411	addr->scope = 0;
412	addr->addr.name.domain = 0;
413
414	return 0;
415}
416
417/**
418 * poll - read and possibly block on pollmask
419 * @file: file structure associated with the socket
420 * @sock: socket for which to calculate the poll bits
421 * @wait: ???
422 *
423 * Returns pollmask value
424 *
425 * COMMENTARY:
426 * It appears that the usual socket locking mechanisms are not useful here
427 * since the pollmask info is potentially out-of-date the moment this routine
428 * exits.  TCP and other protocols seem to rely on higher level poll routines
429 * to handle any preventable race conditions, so TIPC will do the same ...
430 *
431 * TIPC sets the returned events as follows:
432 * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty
433 *    or if a connection-oriented socket is does not have an active connection
434 *    (i.e. a read operation will not block).
435 * b) POLLOUT is set except when a socket's connection has been terminated
436 *    (i.e. a write operation will not block).
437 * c) POLLHUP is set when a socket's connection has been terminated.
438 *
439 * IMPORTANT: The fact that a read or write operation will not block does NOT
440 * imply that the operation will succeed!
441 */
442
443static unsigned int poll(struct file *file, struct socket *sock,
444			 poll_table *wait)
445{
446	struct sock *sk = sock->sk;
447	u32 mask;
448
449	poll_wait(file, sk_sleep(sk), wait);
450
451	if (!skb_queue_empty(&sk->sk_receive_queue) ||
452	    (sock->state == SS_UNCONNECTED) ||
453	    (sock->state == SS_DISCONNECTING))
454		mask = (POLLRDNORM | POLLIN);
455	else
456		mask = 0;
457
458	if (sock->state == SS_DISCONNECTING)
459		mask |= POLLHUP;
460	else
461		mask |= POLLOUT;
462
463	return mask;
464}
465
466/**
467 * dest_name_check - verify user is permitted to send to specified port name
468 * @dest: destination address
469 * @m: descriptor for message to be sent
470 *
471 * Prevents restricted configuration commands from being issued by
472 * unauthorized users.
473 *
474 * Returns 0 if permission is granted, otherwise errno
475 */
476
477static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
478{
479	struct tipc_cfg_msg_hdr hdr;
480
481	if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES))
482		return 0;
483	if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
484		return 0;
485	if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
486		return -EACCES;
487
488	if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
489		return -EFAULT;
490	if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
491		return -EACCES;
492
493	return 0;
494}
495
496/**
497 * send_msg - send message in connectionless manner
498 * @iocb: if NULL, indicates that socket lock is already held
499 * @sock: socket structure
500 * @m: message to send
501 * @total_len: length of message
502 *
503 * Message must have an destination specified explicitly.
504 * Used for SOCK_RDM and SOCK_DGRAM messages,
505 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
506 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
507 *
508 * Returns the number of bytes sent on success, or errno otherwise
509 */
510
511static int send_msg(struct kiocb *iocb, struct socket *sock,
512		    struct msghdr *m, size_t total_len)
513{
514	struct sock *sk = sock->sk;
515	struct tipc_port *tport = tipc_sk_port(sk);
516	struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
517	int needs_conn;
518	int res = -EINVAL;
519
520	if (unlikely(!dest))
521		return -EDESTADDRREQ;
522	if (unlikely((m->msg_namelen < sizeof(*dest)) ||
523		     (dest->family != AF_TIPC)))
524		return -EINVAL;
525
526	if (iocb)
527		lock_sock(sk);
528
529	needs_conn = (sock->state != SS_READY);
530	if (unlikely(needs_conn)) {
531		if (sock->state == SS_LISTENING) {
532			res = -EPIPE;
533			goto exit;
534		}
535		if (sock->state != SS_UNCONNECTED) {
536			res = -EISCONN;
537			goto exit;
538		}
539		if ((tport->published) ||
540		    ((sock->type == SOCK_STREAM) && (total_len != 0))) {
541			res = -EOPNOTSUPP;
542			goto exit;
543		}
544		if (dest->addrtype == TIPC_ADDR_NAME) {
545			tport->conn_type = dest->addr.name.name.type;
546			tport->conn_instance = dest->addr.name.name.instance;
547		}
548
549		/* Abort any pending connection attempts (very unlikely) */
550
551		reject_rx_queue(sk);
552	}
553
554	do {
555		if (dest->addrtype == TIPC_ADDR_NAME) {
556			if ((res = dest_name_check(dest, m)))
557				break;
558			res = tipc_send2name(tport->ref,
559					     &dest->addr.name.name,
560					     dest->addr.name.domain,
561					     m->msg_iovlen,
562					     m->msg_iov);
563		}
564		else if (dest->addrtype == TIPC_ADDR_ID) {
565			res = tipc_send2port(tport->ref,
566					     &dest->addr.id,
567					     m->msg_iovlen,
568					     m->msg_iov);
569		}
570		else if (dest->addrtype == TIPC_ADDR_MCAST) {
571			if (needs_conn) {
572				res = -EOPNOTSUPP;
573				break;
574			}
575			if ((res = dest_name_check(dest, m)))
576				break;
577			res = tipc_multicast(tport->ref,
578					     &dest->addr.nameseq,
579					     0,
580					     m->msg_iovlen,
581					     m->msg_iov);
582		}
583		if (likely(res != -ELINKCONG)) {
584			if (needs_conn && (res >= 0)) {
585				sock->state = SS_CONNECTING;
586			}
587			break;
588		}
589		if (m->msg_flags & MSG_DONTWAIT) {
590			res = -EWOULDBLOCK;
591			break;
592		}
593		release_sock(sk);
594		res = wait_event_interruptible(*sk_sleep(sk),
595					       !tport->congested);
596		lock_sock(sk);
597		if (res)
598			break;
599	} while (1);
600
601exit:
602	if (iocb)
603		release_sock(sk);
604	return res;
605}
606
607/**
608 * send_packet - send a connection-oriented message
609 * @iocb: if NULL, indicates that socket lock is already held
610 * @sock: socket structure
611 * @m: message to send
612 * @total_len: length of message
613 *
614 * Used for SOCK_SEQPACKET messages and SOCK_STREAM data.
615 *
616 * Returns the number of bytes sent on success, or errno otherwise
617 */
618
619static int send_packet(struct kiocb *iocb, struct socket *sock,
620		       struct msghdr *m, size_t total_len)
621{
622	struct sock *sk = sock->sk;
623	struct tipc_port *tport = tipc_sk_port(sk);
624	struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
625	int res;
626
627	/* Handle implied connection establishment */
628
629	if (unlikely(dest))
630		return send_msg(iocb, sock, m, total_len);
631
632	if (iocb)
633		lock_sock(sk);
634
635	do {
636		if (unlikely(sock->state != SS_CONNECTED)) {
637			if (sock->state == SS_DISCONNECTING)
638				res = -EPIPE;
639			else
640				res = -ENOTCONN;
641			break;
642		}
643
644		res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov);
645		if (likely(res != -ELINKCONG)) {
646			break;
647		}
648		if (m->msg_flags & MSG_DONTWAIT) {
649			res = -EWOULDBLOCK;
650			break;
651		}
652		release_sock(sk);
653		res = wait_event_interruptible(*sk_sleep(sk),
654			(!tport->congested || !tport->connected));
655		lock_sock(sk);
656		if (res)
657			break;
658	} while (1);
659
660	if (iocb)
661		release_sock(sk);
662	return res;
663}
664
665/**
666 * send_stream - send stream-oriented data
667 * @iocb: (unused)
668 * @sock: socket structure
669 * @m: data to send
670 * @total_len: total length of data to be sent
671 *
672 * Used for SOCK_STREAM data.
673 *
674 * Returns the number of bytes sent on success (or partial success),
675 * or errno if no data sent
676 */
677
678static int send_stream(struct kiocb *iocb, struct socket *sock,
679		       struct msghdr *m, size_t total_len)
680{
681	struct sock *sk = sock->sk;
682	struct tipc_port *tport = tipc_sk_port(sk);
683	struct msghdr my_msg;
684	struct iovec my_iov;
685	struct iovec *curr_iov;
686	int curr_iovlen;
687	char __user *curr_start;
688	u32 hdr_size;
689	int curr_left;
690	int bytes_to_send;
691	int bytes_sent;
692	int res;
693
694	lock_sock(sk);
695
696	/* Handle special cases where there is no connection */
697
698	if (unlikely(sock->state != SS_CONNECTED)) {
699		if (sock->state == SS_UNCONNECTED) {
700			res = send_packet(NULL, sock, m, total_len);
701			goto exit;
702		} else if (sock->state == SS_DISCONNECTING) {
703			res = -EPIPE;
704			goto exit;
705		} else {
706			res = -ENOTCONN;
707			goto exit;
708		}
709	}
710
711	if (unlikely(m->msg_name)) {
712		res = -EISCONN;
713		goto exit;
714	}
715
716	/*
717	 * Send each iovec entry using one or more messages
718	 *
719	 * Note: This algorithm is good for the most likely case
720	 * (i.e. one large iovec entry), but could be improved to pass sets
721	 * of small iovec entries into send_packet().
722	 */
723
724	curr_iov = m->msg_iov;
725	curr_iovlen = m->msg_iovlen;
726	my_msg.msg_iov = &my_iov;
727	my_msg.msg_iovlen = 1;
728	my_msg.msg_flags = m->msg_flags;
729	my_msg.msg_name = NULL;
730	bytes_sent = 0;
731
732	hdr_size = msg_hdr_sz(&tport->phdr);
733
734	while (curr_iovlen--) {
735		curr_start = curr_iov->iov_base;
736		curr_left = curr_iov->iov_len;
737
738		while (curr_left) {
739			bytes_to_send = tport->max_pkt - hdr_size;
740			if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
741				bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
742			if (curr_left < bytes_to_send)
743				bytes_to_send = curr_left;
744			my_iov.iov_base = curr_start;
745			my_iov.iov_len = bytes_to_send;
746			if ((res = send_packet(NULL, sock, &my_msg, 0)) < 0) {
747				if (bytes_sent)
748					res = bytes_sent;
749				goto exit;
750			}
751			curr_left -= bytes_to_send;
752			curr_start += bytes_to_send;
753			bytes_sent += bytes_to_send;
754		}
755
756		curr_iov++;
757	}
758	res = bytes_sent;
759exit:
760	release_sock(sk);
761	return res;
762}
763
764/**
765 * auto_connect - complete connection setup to a remote port
766 * @sock: socket structure
767 * @msg: peer's response message
768 *
769 * Returns 0 on success, errno otherwise
770 */
771
772static int auto_connect(struct socket *sock, struct tipc_msg *msg)
773{
774	struct tipc_sock *tsock = tipc_sk(sock->sk);
775
776	if (msg_errcode(msg)) {
777		sock->state = SS_DISCONNECTING;
778		return -ECONNREFUSED;
779	}
780
781	tsock->peer_name.ref = msg_origport(msg);
782	tsock->peer_name.node = msg_orignode(msg);
783	tipc_connect2port(tsock->p->ref, &tsock->peer_name);
784	tipc_set_portimportance(tsock->p->ref, msg_importance(msg));
785	sock->state = SS_CONNECTED;
786	return 0;
787}
788
789/**
790 * set_orig_addr - capture sender's address for received message
791 * @m: descriptor for message info
792 * @msg: received message header
793 *
794 * Note: Address is not captured if not requested by receiver.
795 */
796
797static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
798{
799	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)m->msg_name;
800
801	if (addr) {
802		addr->family = AF_TIPC;
803		addr->addrtype = TIPC_ADDR_ID;
804		addr->addr.id.ref = msg_origport(msg);
805		addr->addr.id.node = msg_orignode(msg);
806		addr->addr.name.domain = 0;   	/* could leave uninitialized */
807		addr->scope = 0;   		/* could leave uninitialized */
808		m->msg_namelen = sizeof(struct sockaddr_tipc);
809	}
810}
811
812/**
813 * anc_data_recv - optionally capture ancillary data for received message
814 * @m: descriptor for message info
815 * @msg: received message header
816 * @tport: TIPC port associated with message
817 *
818 * Note: Ancillary data is not captured if not requested by receiver.
819 *
820 * Returns 0 if successful, otherwise errno
821 */
822
823static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
824				struct tipc_port *tport)
825{
826	u32 anc_data[3];
827	u32 err;
828	u32 dest_type;
829	int has_name;
830	int res;
831
832	if (likely(m->msg_controllen == 0))
833		return 0;
834
835	/* Optionally capture errored message object(s) */
836
837	err = msg ? msg_errcode(msg) : 0;
838	if (unlikely(err)) {
839		anc_data[0] = err;
840		anc_data[1] = msg_data_sz(msg);
841		if ((res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data)))
842			return res;
843		if (anc_data[1] &&
844		    (res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
845				    msg_data(msg))))
846			return res;
847	}
848
849	/* Optionally capture message destination object */
850
851	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
852	switch (dest_type) {
853	case TIPC_NAMED_MSG:
854		has_name = 1;
855		anc_data[0] = msg_nametype(msg);
856		anc_data[1] = msg_namelower(msg);
857		anc_data[2] = msg_namelower(msg);
858		break;
859	case TIPC_MCAST_MSG:
860		has_name = 1;
861		anc_data[0] = msg_nametype(msg);
862		anc_data[1] = msg_namelower(msg);
863		anc_data[2] = msg_nameupper(msg);
864		break;
865	case TIPC_CONN_MSG:
866		has_name = (tport->conn_type != 0);
867		anc_data[0] = tport->conn_type;
868		anc_data[1] = tport->conn_instance;
869		anc_data[2] = tport->conn_instance;
870		break;
871	default:
872		has_name = 0;
873	}
874	if (has_name &&
875	    (res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data)))
876		return res;
877
878	return 0;
879}
880
881/**
882 * recv_msg - receive packet-oriented message
883 * @iocb: (unused)
884 * @m: descriptor for message info
885 * @buf_len: total size of user buffer area
886 * @flags: receive flags
887 *
888 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
889 * If the complete message doesn't fit in user area, truncate it.
890 *
891 * Returns size of returned message data, errno otherwise
892 */
893
894static int recv_msg(struct kiocb *iocb, struct socket *sock,
895		    struct msghdr *m, size_t buf_len, int flags)
896{
897	struct sock *sk = sock->sk;
898	struct tipc_port *tport = tipc_sk_port(sk);
899	struct sk_buff *buf;
900	struct tipc_msg *msg;
901	unsigned int sz;
902	u32 err;
903	int res;
904
905	/* Catch invalid receive requests */
906
907	if (m->msg_iovlen != 1)
908		return -EOPNOTSUPP;   /* Don't do multiple iovec entries yet */
909
910	if (unlikely(!buf_len))
911		return -EINVAL;
912
913	lock_sock(sk);
914
915	if (unlikely(sock->state == SS_UNCONNECTED)) {
916		res = -ENOTCONN;
917		goto exit;
918	}
919
920restart:
921
922	/* Look for a message in receive queue; wait if necessary */
923
924	while (skb_queue_empty(&sk->sk_receive_queue)) {
925		if (sock->state == SS_DISCONNECTING) {
926			res = -ENOTCONN;
927			goto exit;
928		}
929		if (flags & MSG_DONTWAIT) {
930			res = -EWOULDBLOCK;
931			goto exit;
932		}
933		release_sock(sk);
934		res = wait_event_interruptible(*sk_sleep(sk),
935			(!skb_queue_empty(&sk->sk_receive_queue) ||
936			 (sock->state == SS_DISCONNECTING)));
937		lock_sock(sk);
938		if (res)
939			goto exit;
940	}
941
942	/* Look at first message in receive queue */
943
944	buf = skb_peek(&sk->sk_receive_queue);
945	msg = buf_msg(buf);
946	sz = msg_data_sz(msg);
947	err = msg_errcode(msg);
948
949	/* Complete connection setup for an implied connect */
950
951	if (unlikely(sock->state == SS_CONNECTING)) {
952		res = auto_connect(sock, msg);
953		if (res)
954			goto exit;
955	}
956
957	/* Discard an empty non-errored message & try again */
958
959	if ((!sz) && (!err)) {
960		advance_rx_queue(sk);
961		goto restart;
962	}
963
964	/* Capture sender's address (optional) */
965
966	set_orig_addr(m, msg);
967
968	/* Capture ancillary data (optional) */
969
970	res = anc_data_recv(m, msg, tport);
971	if (res)
972		goto exit;
973
974	/* Capture message data (if valid) & compute return value (always) */
975
976	if (!err) {
977		if (unlikely(buf_len < sz)) {
978			sz = buf_len;
979			m->msg_flags |= MSG_TRUNC;
980		}
981		if (unlikely(copy_to_user(m->msg_iov->iov_base, msg_data(msg),
982					  sz))) {
983			res = -EFAULT;
984			goto exit;
985		}
986		res = sz;
987	} else {
988		if ((sock->state == SS_READY) ||
989		    ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
990			res = 0;
991		else
992			res = -ECONNRESET;
993	}
994
995	/* Consume received message (optional) */
996
997	if (likely(!(flags & MSG_PEEK))) {
998		if ((sock->state != SS_READY) &&
999		    (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
1000			tipc_acknowledge(tport->ref, tport->conn_unacked);
1001		advance_rx_queue(sk);
1002	}
1003exit:
1004	release_sock(sk);
1005	return res;
1006}
1007
1008/**
1009 * recv_stream - receive stream-oriented data
1010 * @iocb: (unused)
1011 * @m: descriptor for message info
1012 * @buf_len: total size of user buffer area
1013 * @flags: receive flags
1014 *
1015 * Used for SOCK_STREAM messages only.  If not enough data is available
1016 * will optionally wait for more; never truncates data.
1017 *
1018 * Returns size of returned message data, errno otherwise
1019 */
1020
1021static int recv_stream(struct kiocb *iocb, struct socket *sock,
1022		       struct msghdr *m, size_t buf_len, int flags)
1023{
1024	struct sock *sk = sock->sk;
1025	struct tipc_port *tport = tipc_sk_port(sk);
1026	struct sk_buff *buf;
1027	struct tipc_msg *msg;
1028	unsigned int sz;
1029	int sz_to_copy;
1030	int sz_copied = 0;
1031	int needed;
1032	char __user *crs = m->msg_iov->iov_base;
1033	unsigned char *buf_crs;
1034	u32 err;
1035	int res = 0;
1036
1037	/* Catch invalid receive attempts */
1038
1039	if (m->msg_iovlen != 1)
1040		return -EOPNOTSUPP;   /* Don't do multiple iovec entries yet */
1041
1042	if (unlikely(!buf_len))
1043		return -EINVAL;
1044
1045	lock_sock(sk);
1046
1047	if (unlikely((sock->state == SS_UNCONNECTED) ||
1048		     (sock->state == SS_CONNECTING))) {
1049		res = -ENOTCONN;
1050		goto exit;
1051	}
1052
1053restart:
1054
1055	/* Look for a message in receive queue; wait if necessary */
1056
1057	while (skb_queue_empty(&sk->sk_receive_queue)) {
1058		if (sock->state == SS_DISCONNECTING) {
1059			res = -ENOTCONN;
1060			goto exit;
1061		}
1062		if (flags & MSG_DONTWAIT) {
1063			res = -EWOULDBLOCK;
1064			goto exit;
1065		}
1066		release_sock(sk);
1067		res = wait_event_interruptible(*sk_sleep(sk),
1068			(!skb_queue_empty(&sk->sk_receive_queue) ||
1069			 (sock->state == SS_DISCONNECTING)));
1070		lock_sock(sk);
1071		if (res)
1072			goto exit;
1073	}
1074
1075	/* Look at first message in receive queue */
1076
1077	buf = skb_peek(&sk->sk_receive_queue);
1078	msg = buf_msg(buf);
1079	sz = msg_data_sz(msg);
1080	err = msg_errcode(msg);
1081
1082	/* Discard an empty non-errored message & try again */
1083
1084	if ((!sz) && (!err)) {
1085		advance_rx_queue(sk);
1086		goto restart;
1087	}
1088
1089	/* Optionally capture sender's address & ancillary data of first msg */
1090
1091	if (sz_copied == 0) {
1092		set_orig_addr(m, msg);
1093		res = anc_data_recv(m, msg, tport);
1094		if (res)
1095			goto exit;
1096	}
1097
1098	/* Capture message data (if valid) & compute return value (always) */
1099
1100	if (!err) {
1101		buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle);
1102		sz = (unsigned char *)msg + msg_size(msg) - buf_crs;
1103
1104		needed = (buf_len - sz_copied);
1105		sz_to_copy = (sz <= needed) ? sz : needed;
1106		if (unlikely(copy_to_user(crs, buf_crs, sz_to_copy))) {
1107			res = -EFAULT;
1108			goto exit;
1109		}
1110		sz_copied += sz_to_copy;
1111
1112		if (sz_to_copy < sz) {
1113			if (!(flags & MSG_PEEK))
1114				TIPC_SKB_CB(buf)->handle = buf_crs + sz_to_copy;
1115			goto exit;
1116		}
1117
1118		crs += sz_to_copy;
1119	} else {
1120		if (sz_copied != 0)
1121			goto exit; /* can't add error msg to valid data */
1122
1123		if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
1124			res = 0;
1125		else
1126			res = -ECONNRESET;
1127	}
1128
1129	/* Consume received message (optional) */
1130
1131	if (likely(!(flags & MSG_PEEK))) {
1132		if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
1133			tipc_acknowledge(tport->ref, tport->conn_unacked);
1134		advance_rx_queue(sk);
1135	}
1136
1137	/* Loop around if more data is required */
1138
1139	if ((sz_copied < buf_len) &&	/* didn't get all requested data */
1140	    (!skb_queue_empty(&sk->sk_receive_queue) ||
1141	     (flags & MSG_WAITALL)) &&	/* and more is ready or required */
1142	    (!(flags & MSG_PEEK)) &&	/* and aren't just peeking at data */
1143	    (!err))			/* and haven't reached a FIN */
1144		goto restart;
1145
1146exit:
1147	release_sock(sk);
1148	return sz_copied ? sz_copied : res;
1149}
1150
1151/**
1152 * rx_queue_full - determine if receive queue can accept another message
1153 * @msg: message to be added to queue
1154 * @queue_size: current size of queue
1155 * @base: nominal maximum size of queue
1156 *
1157 * Returns 1 if queue is unable to accept message, 0 otherwise
1158 */
1159
1160static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
1161{
1162	u32 threshold;
1163	u32 imp = msg_importance(msg);
1164
1165	if (imp == TIPC_LOW_IMPORTANCE)
1166		threshold = base;
1167	else if (imp == TIPC_MEDIUM_IMPORTANCE)
1168		threshold = base * 2;
1169	else if (imp == TIPC_HIGH_IMPORTANCE)
1170		threshold = base * 100;
1171	else
1172		return 0;
1173
1174	if (msg_connected(msg))
1175		threshold *= 4;
1176
1177	return (queue_size >= threshold);
1178}
1179
1180/**
1181 * filter_rcv - validate incoming message
1182 * @sk: socket
1183 * @buf: message
1184 *
1185 * Enqueues message on receive queue if acceptable; optionally handles
1186 * disconnect indication for a connected socket.
1187 *
1188 * Called with socket lock already taken; port lock may also be taken.
1189 *
1190 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1191 */
1192
1193static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
1194{
1195	struct socket *sock = sk->sk_socket;
1196	struct tipc_msg *msg = buf_msg(buf);
1197	u32 recv_q_len;
1198
1199	/* Reject message if it is wrong sort of message for socket */
1200
1201	/*
1202	 * WOULD IT BE BETTER TO JUST DISCARD THESE MESSAGES INSTEAD?
1203	 * "NO PORT" ISN'T REALLY THE RIGHT ERROR CODE, AND THERE MAY
1204	 * BE SECURITY IMPLICATIONS INHERENT IN REJECTING INVALID TRAFFIC
1205	 */
1206
1207	if (sock->state == SS_READY) {
1208		if (msg_connected(msg)) {
1209			msg_dbg(msg, "dispatch filter 1\n");
1210			return TIPC_ERR_NO_PORT;
1211		}
1212	} else {
1213		if (msg_mcast(msg)) {
1214			msg_dbg(msg, "dispatch filter 2\n");
1215			return TIPC_ERR_NO_PORT;
1216		}
1217		if (sock->state == SS_CONNECTED) {
1218			if (!msg_connected(msg)) {
1219				msg_dbg(msg, "dispatch filter 3\n");
1220				return TIPC_ERR_NO_PORT;
1221			}
1222		}
1223		else if (sock->state == SS_CONNECTING) {
1224			if (!msg_connected(msg) && (msg_errcode(msg) == 0)) {
1225				msg_dbg(msg, "dispatch filter 4\n");
1226				return TIPC_ERR_NO_PORT;
1227			}
1228		}
1229		else if (sock->state == SS_LISTENING) {
1230			if (msg_connected(msg) || msg_errcode(msg)) {
1231				msg_dbg(msg, "dispatch filter 5\n");
1232				return TIPC_ERR_NO_PORT;
1233			}
1234		}
1235		else if (sock->state == SS_DISCONNECTING) {
1236			msg_dbg(msg, "dispatch filter 6\n");
1237			return TIPC_ERR_NO_PORT;
1238		}
1239		else /* (sock->state == SS_UNCONNECTED) */ {
1240			if (msg_connected(msg) || msg_errcode(msg)) {
1241				msg_dbg(msg, "dispatch filter 7\n");
1242				return TIPC_ERR_NO_PORT;
1243			}
1244		}
1245	}
1246
1247	/* Reject message if there isn't room to queue it */
1248
1249	recv_q_len = (u32)atomic_read(&tipc_queue_size);
1250	if (unlikely(recv_q_len >= OVERLOAD_LIMIT_BASE)) {
1251		if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE))
1252			return TIPC_ERR_OVERLOAD;
1253	}
1254	recv_q_len = skb_queue_len(&sk->sk_receive_queue);
1255	if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) {
1256		if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2))
1257			return TIPC_ERR_OVERLOAD;
1258	}
1259
1260	/* Enqueue message (finally!) */
1261
1262	msg_dbg(msg, "<DISP<: ");
1263	TIPC_SKB_CB(buf)->handle = msg_data(msg);
1264	atomic_inc(&tipc_queue_size);
1265	__skb_queue_tail(&sk->sk_receive_queue, buf);
1266
1267	/* Initiate connection termination for an incoming 'FIN' */
1268
1269	if (unlikely(msg_errcode(msg) && (sock->state == SS_CONNECTED))) {
1270		sock->state = SS_DISCONNECTING;
1271		tipc_disconnect_port(tipc_sk_port(sk));
1272	}
1273
1274	if (waitqueue_active(sk_sleep(sk)))
1275		wake_up_interruptible(sk_sleep(sk));
1276	return TIPC_OK;
1277}
1278
1279/**
1280 * backlog_rcv - handle incoming message from backlog queue
1281 * @sk: socket
1282 * @buf: message
1283 *
1284 * Caller must hold socket lock, but not port lock.
1285 *
1286 * Returns 0
1287 */
1288
1289static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
1290{
1291	u32 res;
1292
1293	res = filter_rcv(sk, buf);
1294	if (res)
1295		tipc_reject_msg(buf, res);
1296	return 0;
1297}
1298
1299/**
1300 * dispatch - handle incoming message
1301 * @tport: TIPC port that received message
1302 * @buf: message
1303 *
1304 * Called with port lock already taken.
1305 *
1306 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1307 */
1308
1309static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
1310{
1311	struct sock *sk = (struct sock *)tport->usr_handle;
1312	u32 res;
1313
1314	/*
1315	 * Process message if socket is unlocked; otherwise add to backlog queue
1316	 *
1317	 * This code is based on sk_receive_skb(), but must be distinct from it
1318	 * since a TIPC-specific filter/reject mechanism is utilized
1319	 */
1320
1321	bh_lock_sock(sk);
1322	if (!sock_owned_by_user(sk)) {
1323		res = filter_rcv(sk, buf);
1324	} else {
1325		if (sk_add_backlog(sk, buf))
1326			res = TIPC_ERR_OVERLOAD;
1327		else
1328			res = TIPC_OK;
1329	}
1330	bh_unlock_sock(sk);
1331
1332	return res;
1333}
1334
1335/**
1336 * wakeupdispatch - wake up port after congestion
1337 * @tport: port to wakeup
1338 *
1339 * Called with port lock already taken.
1340 */
1341
1342static void wakeupdispatch(struct tipc_port *tport)
1343{
1344	struct sock *sk = (struct sock *)tport->usr_handle;
1345
1346	if (waitqueue_active(sk_sleep(sk)))
1347		wake_up_interruptible(sk_sleep(sk));
1348}
1349
1350/**
1351 * connect - establish a connection to another TIPC port
1352 * @sock: socket structure
1353 * @dest: socket address for destination port
1354 * @destlen: size of socket address data structure
1355 * @flags: file-related flags associated with socket
1356 *
1357 * Returns 0 on success, errno otherwise
1358 */
1359
1360static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1361		   int flags)
1362{
1363	struct sock *sk = sock->sk;
1364	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
1365	struct msghdr m = {NULL,};
1366	struct sk_buff *buf;
1367	struct tipc_msg *msg;
1368	int res;
1369
1370	lock_sock(sk);
1371
1372	/* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
1373
1374	if (sock->state == SS_READY) {
1375		res = -EOPNOTSUPP;
1376		goto exit;
1377	}
1378
1379	/* For now, TIPC does not support the non-blocking form of connect() */
1380
1381	if (flags & O_NONBLOCK) {
1382		res = -EWOULDBLOCK;
1383		goto exit;
1384	}
1385
1386	/* Issue Posix-compliant error code if socket is in the wrong state */
1387
1388	if (sock->state == SS_LISTENING) {
1389		res = -EOPNOTSUPP;
1390		goto exit;
1391	}
1392	if (sock->state == SS_CONNECTING) {
1393		res = -EALREADY;
1394		goto exit;
1395	}
1396	if (sock->state != SS_UNCONNECTED) {
1397		res = -EISCONN;
1398		goto exit;
1399	}
1400
1401	/*
1402	 * Reject connection attempt using multicast address
1403	 *
1404	 * Note: send_msg() validates the rest of the address fields,
1405	 *       so there's no need to do it here
1406	 */
1407
1408	if (dst->addrtype == TIPC_ADDR_MCAST) {
1409		res = -EINVAL;
1410		goto exit;
1411	}
1412
1413	/* Reject any messages already in receive queue (very unlikely) */
1414
1415	reject_rx_queue(sk);
1416
1417	/* Send a 'SYN-' to destination */
1418
1419	m.msg_name = dest;
1420	m.msg_namelen = destlen;
1421	res = send_msg(NULL, sock, &m, 0);
1422	if (res < 0) {
1423		goto exit;
1424	}
1425
1426	/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1427
1428	release_sock(sk);
1429	res = wait_event_interruptible_timeout(*sk_sleep(sk),
1430			(!skb_queue_empty(&sk->sk_receive_queue) ||
1431			(sock->state != SS_CONNECTING)),
1432			sk->sk_rcvtimeo);
1433	lock_sock(sk);
1434
1435	if (res > 0) {
1436		buf = skb_peek(&sk->sk_receive_queue);
1437		if (buf != NULL) {
1438			msg = buf_msg(buf);
1439			res = auto_connect(sock, msg);
1440			if (!res) {
1441				if (!msg_data_sz(msg))
1442					advance_rx_queue(sk);
1443			}
1444		} else {
1445			if (sock->state == SS_CONNECTED) {
1446				res = -EISCONN;
1447			} else {
1448				res = -ECONNREFUSED;
1449			}
1450		}
1451	} else {
1452		if (res == 0)
1453			res = -ETIMEDOUT;
1454		else
1455			; /* leave "res" unchanged */
1456		sock->state = SS_DISCONNECTING;
1457	}
1458
1459exit:
1460	release_sock(sk);
1461	return res;
1462}
1463
1464/**
1465 * listen - allow socket to listen for incoming connections
1466 * @sock: socket structure
1467 * @len: (unused)
1468 *
1469 * Returns 0 on success, errno otherwise
1470 */
1471
1472static int listen(struct socket *sock, int len)
1473{
1474	struct sock *sk = sock->sk;
1475	int res;
1476
1477	lock_sock(sk);
1478
1479	if (sock->state == SS_READY)
1480		res = -EOPNOTSUPP;
1481	else if (sock->state != SS_UNCONNECTED)
1482		res = -EINVAL;
1483	else {
1484		sock->state = SS_LISTENING;
1485		res = 0;
1486	}
1487
1488	release_sock(sk);
1489	return res;
1490}
1491
1492/**
1493 * accept - wait for connection request
1494 * @sock: listening socket
1495 * @newsock: new socket that is to be connected
1496 * @flags: file-related flags associated with socket
1497 *
1498 * Returns 0 on success, errno otherwise
1499 */
1500
1501static int accept(struct socket *sock, struct socket *new_sock, int flags)
1502{
1503	struct sock *sk = sock->sk;
1504	struct sk_buff *buf;
1505	int res;
1506
1507	lock_sock(sk);
1508
1509	if (sock->state == SS_READY) {
1510		res = -EOPNOTSUPP;
1511		goto exit;
1512	}
1513	if (sock->state != SS_LISTENING) {
1514		res = -EINVAL;
1515		goto exit;
1516	}
1517
1518	while (skb_queue_empty(&sk->sk_receive_queue)) {
1519		if (flags & O_NONBLOCK) {
1520			res = -EWOULDBLOCK;
1521			goto exit;
1522		}
1523		release_sock(sk);
1524		res = wait_event_interruptible(*sk_sleep(sk),
1525				(!skb_queue_empty(&sk->sk_receive_queue)));
1526		lock_sock(sk);
1527		if (res)
1528			goto exit;
1529	}
1530
1531	buf = skb_peek(&sk->sk_receive_queue);
1532
1533	res = tipc_create(sock_net(sock->sk), new_sock, 0, 0);
1534	if (!res) {
1535		struct sock *new_sk = new_sock->sk;
1536		struct tipc_sock *new_tsock = tipc_sk(new_sk);
1537		struct tipc_port *new_tport = new_tsock->p;
1538		u32 new_ref = new_tport->ref;
1539		struct tipc_msg *msg = buf_msg(buf);
1540
1541		lock_sock(new_sk);
1542
1543		/*
1544		 * Reject any stray messages received by new socket
1545		 * before the socket lock was taken (very, very unlikely)
1546		 */
1547
1548		reject_rx_queue(new_sk);
1549
1550		/* Connect new socket to it's peer */
1551
1552		new_tsock->peer_name.ref = msg_origport(msg);
1553		new_tsock->peer_name.node = msg_orignode(msg);
1554		tipc_connect2port(new_ref, &new_tsock->peer_name);
1555		new_sock->state = SS_CONNECTED;
1556
1557		tipc_set_portimportance(new_ref, msg_importance(msg));
1558		if (msg_named(msg)) {
1559			new_tport->conn_type = msg_nametype(msg);
1560			new_tport->conn_instance = msg_nameinst(msg);
1561		}
1562
1563		/*
1564		 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
1565		 * Respond to 'SYN+' by queuing it on new socket.
1566		 */
1567
1568		msg_dbg(msg,"<ACC<: ");
1569		if (!msg_data_sz(msg)) {
1570			struct msghdr m = {NULL,};
1571
1572			advance_rx_queue(sk);
1573			send_packet(NULL, new_sock, &m, 0);
1574		} else {
1575			__skb_dequeue(&sk->sk_receive_queue);
1576			__skb_queue_head(&new_sk->sk_receive_queue, buf);
1577		}
1578		release_sock(new_sk);
1579	}
1580exit:
1581	release_sock(sk);
1582	return res;
1583}
1584
1585/**
1586 * shutdown - shutdown socket connection
1587 * @sock: socket structure
1588 * @how: direction to close (must be SHUT_RDWR)
1589 *
1590 * Terminates connection (if necessary), then purges socket's receive queue.
1591 *
1592 * Returns 0 on success, errno otherwise
1593 */
1594
1595static int shutdown(struct socket *sock, int how)
1596{
1597	struct sock *sk = sock->sk;
1598	struct tipc_port *tport = tipc_sk_port(sk);
1599	struct sk_buff *buf;
1600	int res;
1601
1602	if (how != SHUT_RDWR)
1603		return -EINVAL;
1604
1605	lock_sock(sk);
1606
1607	switch (sock->state) {
1608	case SS_CONNECTING:
1609	case SS_CONNECTED:
1610
1611		/* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
1612restart:
1613		buf = __skb_dequeue(&sk->sk_receive_queue);
1614		if (buf) {
1615			atomic_dec(&tipc_queue_size);
1616			if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) {
1617				buf_discard(buf);
1618				goto restart;
1619			}
1620			tipc_disconnect(tport->ref);
1621			tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
1622		} else {
1623			tipc_shutdown(tport->ref);
1624		}
1625
1626		sock->state = SS_DISCONNECTING;
1627
1628		/* fall through */
1629
1630	case SS_DISCONNECTING:
1631
1632		/* Discard any unreceived messages; wake up sleeping tasks */
1633
1634		discard_rx_queue(sk);
1635		if (waitqueue_active(sk_sleep(sk)))
1636			wake_up_interruptible(sk_sleep(sk));
1637		res = 0;
1638		break;
1639
1640	default:
1641		res = -ENOTCONN;
1642	}
1643
1644	release_sock(sk);
1645	return res;
1646}
1647
1648/**
1649 * setsockopt - set socket option
1650 * @sock: socket structure
1651 * @lvl: option level
1652 * @opt: option identifier
1653 * @ov: pointer to new option value
1654 * @ol: length of option value
1655 *
1656 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
1657 * (to ease compatibility).
1658 *
1659 * Returns 0 on success, errno otherwise
1660 */
1661
1662static int setsockopt(struct socket *sock,
1663		      int lvl, int opt, char __user *ov, unsigned int ol)
1664{
1665	struct sock *sk = sock->sk;
1666	struct tipc_port *tport = tipc_sk_port(sk);
1667	u32 value;
1668	int res;
1669
1670	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
1671		return 0;
1672	if (lvl != SOL_TIPC)
1673		return -ENOPROTOOPT;
1674	if (ol < sizeof(value))
1675		return -EINVAL;
1676	if ((res = get_user(value, (u32 __user *)ov)))
1677		return res;
1678
1679	lock_sock(sk);
1680
1681	switch (opt) {
1682	case TIPC_IMPORTANCE:
1683		res = tipc_set_portimportance(tport->ref, value);
1684		break;
1685	case TIPC_SRC_DROPPABLE:
1686		if (sock->type != SOCK_STREAM)
1687			res = tipc_set_portunreliable(tport->ref, value);
1688		else
1689			res = -ENOPROTOOPT;
1690		break;
1691	case TIPC_DEST_DROPPABLE:
1692		res = tipc_set_portunreturnable(tport->ref, value);
1693		break;
1694	case TIPC_CONN_TIMEOUT:
1695		sk->sk_rcvtimeo = msecs_to_jiffies(value);
1696		/* no need to set "res", since already 0 at this point */
1697		break;
1698	default:
1699		res = -EINVAL;
1700	}
1701
1702	release_sock(sk);
1703
1704	return res;
1705}
1706
1707/**
1708 * getsockopt - get socket option
1709 * @sock: socket structure
1710 * @lvl: option level
1711 * @opt: option identifier
1712 * @ov: receptacle for option value
1713 * @ol: receptacle for length of option value
1714 *
1715 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
1716 * (to ease compatibility).
1717 *
1718 * Returns 0 on success, errno otherwise
1719 */
1720
1721static int getsockopt(struct socket *sock,
1722		      int lvl, int opt, char __user *ov, int __user *ol)
1723{
1724	struct sock *sk = sock->sk;
1725	struct tipc_port *tport = tipc_sk_port(sk);
1726	int len;
1727	u32 value;
1728	int res;
1729
1730	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
1731		return put_user(0, ol);
1732	if (lvl != SOL_TIPC)
1733		return -ENOPROTOOPT;
1734	if ((res = get_user(len, ol)))
1735		return res;
1736
1737	lock_sock(sk);
1738
1739	switch (opt) {
1740	case TIPC_IMPORTANCE:
1741		res = tipc_portimportance(tport->ref, &value);
1742		break;
1743	case TIPC_SRC_DROPPABLE:
1744		res = tipc_portunreliable(tport->ref, &value);
1745		break;
1746	case TIPC_DEST_DROPPABLE:
1747		res = tipc_portunreturnable(tport->ref, &value);
1748		break;
1749	case TIPC_CONN_TIMEOUT:
1750		value = jiffies_to_msecs(sk->sk_rcvtimeo);
1751		/* no need to set "res", since already 0 at this point */
1752		break;
1753	 case TIPC_NODE_RECVQ_DEPTH:
1754		value = (u32)atomic_read(&tipc_queue_size);
1755		break;
1756	 case TIPC_SOCK_RECVQ_DEPTH:
1757		value = skb_queue_len(&sk->sk_receive_queue);
1758		break;
1759	default:
1760		res = -EINVAL;
1761	}
1762
1763	release_sock(sk);
1764
1765	if (res) {
1766		/* "get" failed */
1767	}
1768	else if (len < sizeof(value)) {
1769		res = -EINVAL;
1770	}
1771	else if (copy_to_user(ov, &value, sizeof(value))) {
1772		res = -EFAULT;
1773	}
1774	else {
1775		res = put_user(sizeof(value), ol);
1776	}
1777
1778	return res;
1779}
1780
1781/**
1782 * Protocol switches for the various types of TIPC sockets
1783 */
1784
1785static const struct proto_ops msg_ops = {
1786	.owner 		= THIS_MODULE,
1787	.family		= AF_TIPC,
1788	.release	= release,
1789	.bind		= bind,
1790	.connect	= connect,
1791	.socketpair	= sock_no_socketpair,
1792	.accept		= accept,
1793	.getname	= get_name,
1794	.poll		= poll,
1795	.ioctl		= sock_no_ioctl,
1796	.listen		= listen,
1797	.shutdown	= shutdown,
1798	.setsockopt	= setsockopt,
1799	.getsockopt	= getsockopt,
1800	.sendmsg	= send_msg,
1801	.recvmsg	= recv_msg,
1802	.mmap		= sock_no_mmap,
1803	.sendpage	= sock_no_sendpage
1804};
1805
1806static const struct proto_ops packet_ops = {
1807	.owner 		= THIS_MODULE,
1808	.family		= AF_TIPC,
1809	.release	= release,
1810	.bind		= bind,
1811	.connect	= connect,
1812	.socketpair	= sock_no_socketpair,
1813	.accept		= accept,
1814	.getname	= get_name,
1815	.poll		= poll,
1816	.ioctl		= sock_no_ioctl,
1817	.listen		= listen,
1818	.shutdown	= shutdown,
1819	.setsockopt	= setsockopt,
1820	.getsockopt	= getsockopt,
1821	.sendmsg	= send_packet,
1822	.recvmsg	= recv_msg,
1823	.mmap		= sock_no_mmap,
1824	.sendpage	= sock_no_sendpage
1825};
1826
1827static const struct proto_ops stream_ops = {
1828	.owner 		= THIS_MODULE,
1829	.family		= AF_TIPC,
1830	.release	= release,
1831	.bind		= bind,
1832	.connect	= connect,
1833	.socketpair	= sock_no_socketpair,
1834	.accept		= accept,
1835	.getname	= get_name,
1836	.poll		= poll,
1837	.ioctl		= sock_no_ioctl,
1838	.listen		= listen,
1839	.shutdown	= shutdown,
1840	.setsockopt	= setsockopt,
1841	.getsockopt	= getsockopt,
1842	.sendmsg	= send_stream,
1843	.recvmsg	= recv_stream,
1844	.mmap		= sock_no_mmap,
1845	.sendpage	= sock_no_sendpage
1846};
1847
1848static const struct net_proto_family tipc_family_ops = {
1849	.owner 		= THIS_MODULE,
1850	.family		= AF_TIPC,
1851	.create		= tipc_create
1852};
1853
1854static struct proto tipc_proto = {
1855	.name		= "TIPC",
1856	.owner		= THIS_MODULE,
1857	.obj_size	= sizeof(struct tipc_sock)
1858};
1859
1860/**
1861 * tipc_socket_init - initialize TIPC socket interface
1862 *
1863 * Returns 0 on success, errno otherwise
1864 */
1865int tipc_socket_init(void)
1866{
1867	int res;
1868
1869	res = proto_register(&tipc_proto, 1);
1870	if (res) {
1871		err("Failed to register TIPC protocol type\n");
1872		goto out;
1873	}
1874
1875	res = sock_register(&tipc_family_ops);
1876	if (res) {
1877		err("Failed to register TIPC socket type\n");
1878		proto_unregister(&tipc_proto);
1879		goto out;
1880	}
1881
1882	sockets_enabled = 1;
1883 out:
1884	return res;
1885}
1886
1887/**
1888 * tipc_socket_stop - stop TIPC socket interface
1889 */
1890
1891void tipc_socket_stop(void)
1892{
1893	if (!sockets_enabled)
1894		return;
1895
1896	sockets_enabled = 0;
1897	sock_unregister(tipc_family_ops.family);
1898	proto_unregister(&tipc_proto);
1899}
1900