1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		The User Datagram Protocol (UDP).
7 *
8 * Version:	$Id: udp.c,v 1.1.1.1 2008/10/15 03:27:33 james26_jang Exp $
9 *
10 * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
11 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 *		Alan Cox, <Alan.Cox@linux.org>
14 *
15 * Fixes:
16 *		Alan Cox	:	verify_area() calls
17 *		Alan Cox	: 	stopped close while in use off icmp
18 *					messages. Not a fix but a botch that
19 *					for udp at least is 'valid'.
20 *		Alan Cox	:	Fixed icmp handling properly
21 *		Alan Cox	: 	Correct error for oversized datagrams
22 *		Alan Cox	:	Tidied select() semantics.
23 *		Alan Cox	:	udp_err() fixed properly, also now
24 *					select and read wake correctly on errors
25 *		Alan Cox	:	udp_send verify_area moved to avoid mem leak
26 *		Alan Cox	:	UDP can count its memory
27 *		Alan Cox	:	send to an unknown connection causes
28 *					an ECONNREFUSED off the icmp, but
29 *					does NOT close.
30 *		Alan Cox	:	Switched to new sk_buff handlers. No more backlog!
31 *		Alan Cox	:	Using generic datagram code. Even smaller and the PEEK
32 *					bug no longer crashes it.
33 *		Fred Van Kempen	: 	Net2e support for sk->broadcast.
34 *		Alan Cox	:	Uses skb_free_datagram
35 *		Alan Cox	:	Added get/set sockopt support.
36 *		Alan Cox	:	Broadcasting without option set returns EACCES.
37 *		Alan Cox	:	No wakeup calls. Instead we now use the callbacks.
38 *		Alan Cox	:	Use ip_tos and ip_ttl
39 *		Alan Cox	:	SNMP Mibs
40 *		Alan Cox	:	MSG_DONTROUTE, and 0.0.0.0 support.
41 *		Matt Dillon	:	UDP length checks.
42 *		Alan Cox	:	Smarter af_inet used properly.
43 *		Alan Cox	:	Use new kernel side addressing.
44 *		Alan Cox	:	Incorrect return on truncated datagram receive.
45 *	Arnt Gulbrandsen 	:	New udp_send and stuff
46 *		Alan Cox	:	Cache last socket
47 *		Alan Cox	:	Route cache
48 *		Jon Peatfield	:	Minor efficiency fix to sendto().
49 *		Mike Shaver	:	RFC1122 checks.
50 *		Alan Cox	:	Nonblocking error fix.
51 *	Willy Konynenberg	:	Transparent proxying support.
52 *		Mike McLagan	:	Routing by source
53 *		David S. Miller	:	New socket lookup architecture.
54 *					Last socket cache retained as it
55 *					does have a high hit rate.
56 *		Olaf Kirch	:	Don't linearise iovec on sendmsg.
57 *		Andi Kleen	:	Some cleanups, cache destination entry
58 *					for connect.
59 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
60 *		Melvin Smith	:	Check msg_name not msg_namelen in sendto(),
61 *					return ENOTCONN for unconnected sockets (POSIX)
62 *		Janos Farkas	:	don't deliver multi/broadcasts to a different
63 *					bound-to-device socket
64 *
65 *
66 *		This program is free software; you can redistribute it and/or
67 *		modify it under the terms of the GNU General Public License
68 *		as published by the Free Software Foundation; either version
69 *		2 of the License, or (at your option) any later version.
70 */
71
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <linux/types.h>
76#include <linux/fcntl.h>
77#include <linux/socket.h>
78#include <linux/sockios.h>
79#include <linux/in.h>
80#include <linux/errno.h>
81#include <linux/timer.h>
82#include <linux/mm.h>
83#include <linux/config.h>
84#include <linux/inet.h>
85#include <linux/netdevice.h>
86#include <net/snmp.h>
87#include <net/ip.h>
88#include <net/protocol.h>
89#include <linux/skbuff.h>
90#include <net/sock.h>
91#include <net/udp.h>
92#include <net/icmp.h>
93#include <net/route.h>
94#include <net/inet_common.h>
95#include <net/checksum.h>
96
97/*
98 *	Snmp MIB for the UDP layer
99 */
100
101struct udp_mib		udp_statistics[NR_CPUS*2];
102
103struct sock *udp_hash[UDP_HTABLE_SIZE];
104rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED;
105
106/* Shared by v4/v6 udp. */
107int udp_port_rover;
108
109static int udp_v4_get_port(struct sock *sk, unsigned short snum)
110{
111	write_lock_bh(&udp_hash_lock);
112	if (snum == 0) {
113		int best_size_so_far, best, result, i;
114
115		if (udp_port_rover > sysctl_local_port_range[1] ||
116		    udp_port_rover < sysctl_local_port_range[0])
117			udp_port_rover = sysctl_local_port_range[0];
118		best_size_so_far = 32767;
119		best = result = udp_port_rover;
120		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
121			struct sock *sk;
122			int size;
123
124			sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
125			if (!sk) {
126				if (result > sysctl_local_port_range[1])
127					result = sysctl_local_port_range[0] +
128						((result - sysctl_local_port_range[0]) &
129						 (UDP_HTABLE_SIZE - 1));
130				goto gotit;
131			}
132			size = 0;
133			do {
134				if (++size >= best_size_so_far)
135					goto next;
136			} while ((sk = sk->next) != NULL);
137			best_size_so_far = size;
138			best = result;
139		next:;
140		}
141		result = best;
142		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
143			if (result > sysctl_local_port_range[1])
144				result = sysctl_local_port_range[0]
145					+ ((result - sysctl_local_port_range[0]) &
146					   (UDP_HTABLE_SIZE - 1));
147			if (!udp_lport_inuse(result))
148				break;
149		}
150		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
151			goto fail;
152gotit:
153		udp_port_rover = snum = result;
154	} else {
155		struct sock *sk2;
156
157		for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
158		     sk2 != NULL;
159		     sk2 = sk2->next) {
160			if (sk2->num == snum &&
161			    sk2 != sk &&
162			    sk2->bound_dev_if == sk->bound_dev_if &&
163			    (!sk2->rcv_saddr ||
164			     !sk->rcv_saddr ||
165			     sk2->rcv_saddr == sk->rcv_saddr) &&
166			    (!sk2->reuse || !sk->reuse))
167				goto fail;
168		}
169	}
170	sk->num = snum;
171	if (sk->pprev == NULL) {
172		struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
173		if ((sk->next = *skp) != NULL)
174			(*skp)->pprev = &sk->next;
175		*skp = sk;
176		sk->pprev = skp;
177		sock_prot_inc_use(sk->prot);
178		sock_hold(sk);
179	}
180	write_unlock_bh(&udp_hash_lock);
181	return 0;
182
183fail:
184	write_unlock_bh(&udp_hash_lock);
185	return 1;
186}
187
188static void udp_v4_hash(struct sock *sk)
189{
190	BUG();
191}
192
193static void udp_v4_unhash(struct sock *sk)
194{
195	write_lock_bh(&udp_hash_lock);
196	if (sk->pprev) {
197		if (sk->next)
198			sk->next->pprev = sk->pprev;
199		*sk->pprev = sk->next;
200		sk->pprev = NULL;
201		sk->num = 0;
202		sock_prot_dec_use(sk->prot);
203		__sock_put(sk);
204	}
205	write_unlock_bh(&udp_hash_lock);
206}
207
208/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
209 * harder than this. -DaveM
210 */
211struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
212{
213	struct sock *sk, *result = NULL;
214	unsigned short hnum = ntohs(dport);
215	int badness = -1;
216
217	for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
218		if(sk->num == hnum) {
219			int score = 0;
220			if(sk->rcv_saddr) {
221				if(sk->rcv_saddr != daddr)
222					continue;
223				score++;
224			}
225			if(sk->daddr) {
226				if(sk->daddr != saddr)
227					continue;
228				score++;
229			}
230			if(sk->dport) {
231				if(sk->dport != sport)
232					continue;
233				score++;
234			}
235			if(sk->bound_dev_if) {
236				if(sk->bound_dev_if != dif)
237					continue;
238				score++;
239			}
240			if(score == 4) {
241				result = sk;
242				break;
243			} else if(score > badness) {
244				result = sk;
245				badness = score;
246			}
247		}
248	}
249	return result;
250}
251
252__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
253{
254	struct sock *sk;
255
256	read_lock(&udp_hash_lock);
257	sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
258	if (sk)
259		sock_hold(sk);
260	read_unlock(&udp_hash_lock);
261	return sk;
262}
263
264static inline struct sock *udp_v4_mcast_next(struct sock *sk,
265					     u16 loc_port, u32 loc_addr,
266					     u16 rmt_port, u32 rmt_addr,
267					     int dif)
268{
269	struct sock *s = sk;
270	unsigned short hnum = ntohs(loc_port);
271	for(; s; s = s->next) {
272		if ((s->num != hnum)					||
273		    (s->daddr && s->daddr!=rmt_addr)			||
274		    (s->dport != rmt_port && s->dport != 0)			||
275		    (s->rcv_saddr  && s->rcv_saddr != loc_addr)		||
276		    (s->bound_dev_if && s->bound_dev_if != dif))
277			continue;
278		break;
279  	}
280  	return s;
281}
282
283/*
284 * This routine is called by the ICMP module when it gets some
285 * sort of error condition.  If err < 0 then the socket should
286 * be closed and the error returned to the user.  If err > 0
287 * it's just the icmp type << 8 | icmp code.
288 * Header points to the ip header of the error packet. We move
289 * on past this. Then (as it used to claim before adjustment)
290 * header points to the first 8 bytes of the udp header.  We need
291 * to find the appropriate port.
292 */
293
294void udp_err(struct sk_buff *skb, u32 info)
295{
296	struct iphdr *iph = (struct iphdr*)skb->data;
297	struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
298	int type = skb->h.icmph->type;
299	int code = skb->h.icmph->code;
300	struct sock *sk;
301	int harderr;
302	int err;
303
304	sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
305	if (sk == NULL) {
306		ICMP_INC_STATS_BH(IcmpInErrors);
307    	  	return;	/* No socket for error */
308	}
309
310	err = 0;
311	harderr = 0;
312
313	switch (type) {
314	default:
315	case ICMP_TIME_EXCEEDED:
316		err = EHOSTUNREACH;
317		break;
318	case ICMP_SOURCE_QUENCH:
319		goto out;
320	case ICMP_PARAMETERPROB:
321		err = EPROTO;
322		harderr = 1;
323		break;
324	case ICMP_DEST_UNREACH:
325		if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
326			if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT) {
327				err = EMSGSIZE;
328				harderr = 1;
329				break;
330			}
331			goto out;
332		}
333		err = EHOSTUNREACH;
334		if (code <= NR_ICMP_UNREACH) {
335			harderr = icmp_err_convert[code].fatal;
336			err = icmp_err_convert[code].errno;
337		}
338		break;
339	}
340
341	/*
342	 *      RFC1122: OK.  Passes ICMP errors back to application, as per
343	 *	4.1.3.3.
344	 */
345	if (!sk->protinfo.af_inet.recverr) {
346		if (!harderr || sk->state != TCP_ESTABLISHED)
347			goto out;
348	} else {
349		ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
350	}
351	sk->err = err;
352	sk->error_report(sk);
353out:
354	sock_put(sk);
355}
356
357
358static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
359{
360	return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
361}
362
363struct udpfakehdr
364{
365	struct udphdr uh;
366	u32 saddr;
367	u32 daddr;
368	struct iovec *iov;
369	u32 wcheck;
370};
371
372/*
373 *	Copy and checksum a UDP packet from user space into a buffer.
374 */
375
376static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen)
377{
378	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
379	if (offset==0) {
380		if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
381						   fraglen-sizeof(struct udphdr), &ufh->wcheck))
382			return -EFAULT;
383 		ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
384					   ufh->wcheck);
385		ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr,
386					  ntohs(ufh->uh.len),
387					  IPPROTO_UDP, ufh->wcheck);
388		if (ufh->uh.check == 0)
389			ufh->uh.check = -1;
390		memcpy(to, ufh, sizeof(struct udphdr));
391		return 0;
392	}
393	if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
394					   fraglen, &ufh->wcheck))
395		return -EFAULT;
396	return 0;
397}
398
399/*
400 *	Copy a UDP packet from user space into a buffer without checksumming.
401 */
402
403static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen)
404{
405	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
406
407	if (offset==0) {
408		memcpy(to, ufh, sizeof(struct udphdr));
409		return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
410					   fraglen-sizeof(struct udphdr));
411	}
412	return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
413				   fraglen);
414}
415
416int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
417{
418	int ulen = len + sizeof(struct udphdr);
419	struct ipcm_cookie ipc;
420	struct udpfakehdr ufh;
421	struct rtable *rt = NULL;
422	int free = 0;
423	int connected = 0;
424	u32 daddr;
425	u8  tos;
426	int err;
427
428	/* This check is ONLY to check for arithmetic overflow
429	   on integer(!) len. Not more! Real check will be made
430	   in ip_build_xmit --ANK
431
432	   BTW socket.c -> af_*.c -> ... make multiple
433	   invalid conversions size_t -> int. We MUST repair it f.e.
434	   by replacing all of them with size_t and revise all
435	   the places sort of len += sizeof(struct iphdr)
436	   If len was ULONG_MAX-10 it would be cathastrophe  --ANK
437	 */
438
439	if (len < 0 || len > 0xFFFF)
440		return -EMSGSIZE;
441
442	/*
443	 *	Check the flags.
444	 */
445
446	if (msg->msg_flags&MSG_OOB)	/* Mirror BSD error message compatibility */
447		return -EOPNOTSUPP;
448
449	/*
450	 *	Get and verify the address.
451	 */
452
453	if (msg->msg_name) {
454		struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
455		if (msg->msg_namelen < sizeof(*usin))
456			return -EINVAL;
457		if (usin->sin_family != AF_INET) {
458			if (usin->sin_family != AF_UNSPEC)
459				return -EINVAL;
460		}
461
462		ufh.daddr = usin->sin_addr.s_addr;
463		ufh.uh.dest = usin->sin_port;
464		if (ufh.uh.dest == 0)
465			return -EINVAL;
466	} else {
467		if (sk->state != TCP_ESTABLISHED)
468			return -ENOTCONN;
469		ufh.daddr = sk->daddr;
470		ufh.uh.dest = sk->dport;
471		/* Open fast path for connected socket.
472		   Route will not be used, if at least one option is set.
473		 */
474		connected = 1;
475  	}
476	ipc.addr = sk->saddr;
477	ufh.uh.source = sk->sport;
478
479	ipc.opt = NULL;
480	ipc.oif = sk->bound_dev_if;
481	if (msg->msg_controllen) {
482		err = ip_cmsg_send(msg, &ipc);
483		if (err)
484			return err;
485		if (ipc.opt)
486			free = 1;
487		connected = 0;
488	}
489	if (!ipc.opt)
490		ipc.opt = sk->protinfo.af_inet.opt;
491
492	ufh.saddr = ipc.addr;
493	ipc.addr = daddr = ufh.daddr;
494
495	if (ipc.opt && ipc.opt->srr) {
496		if (!daddr)
497			return -EINVAL;
498		daddr = ipc.opt->faddr;
499		connected = 0;
500	}
501	tos = RT_TOS(sk->protinfo.af_inet.tos);
502	if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
503	    (ipc.opt && ipc.opt->is_strictroute)) {
504		tos |= RTO_ONLINK;
505		connected = 0;
506	}
507
508	if (MULTICAST(daddr)) {
509		if (!ipc.oif)
510			ipc.oif = sk->protinfo.af_inet.mc_index;
511		if (!ufh.saddr)
512			ufh.saddr = sk->protinfo.af_inet.mc_addr;
513		connected = 0;
514	}
515
516	if (connected)
517		rt = (struct rtable*)sk_dst_check(sk, 0);
518
519	if (rt == NULL) {
520		err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
521		if (err)
522			goto out;
523
524		err = -EACCES;
525		if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast)
526			goto out;
527		if (connected)
528			sk_dst_set(sk, dst_clone(&rt->u.dst));
529	}
530
531	if (msg->msg_flags&MSG_CONFIRM)
532		goto do_confirm;
533back_from_confirm:
534
535	ufh.saddr = rt->rt_src;
536	if (!ipc.addr)
537		ufh.daddr = ipc.addr = rt->rt_dst;
538	ufh.uh.len = htons(ulen);
539	ufh.uh.check = 0;
540	ufh.iov = msg->msg_iov;
541	ufh.wcheck = 0;
542
543	/* RFC1122: OK.  Provides the checksumming facility (MUST) as per */
544	/* 4.1.3.4. It's configurable by the application via setsockopt() */
545	/* (MAY) and it defaults to on (MUST). */
546
547	err = ip_build_xmit(sk,
548			    (sk->no_check == UDP_CSUM_NOXMIT ?
549			     udp_getfrag_nosum :
550			     udp_getfrag),
551			    &ufh, ulen, &ipc, rt, msg->msg_flags);
552
553out:
554	ip_rt_put(rt);
555	if (free)
556		kfree(ipc.opt);
557	if (!err) {
558		UDP_INC_STATS_USER(UdpOutDatagrams);
559		return len;
560	}
561	return err;
562
563do_confirm:
564	dst_confirm(&rt->u.dst);
565	if (!(msg->msg_flags&MSG_PROBE) || len)
566		goto back_from_confirm;
567	err = 0;
568	goto out;
569}
570
571/*
572 *	IOCTL requests applicable to the UDP protocol
573 */
574
575int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
576{
577	switch(cmd)
578	{
579		case SIOCOUTQ:
580		{
581			int amount = atomic_read(&sk->wmem_alloc);
582			return put_user(amount, (int *)arg);
583		}
584
585		case SIOCINQ:
586		{
587			struct sk_buff *skb;
588			unsigned long amount;
589
590			amount = 0;
591			spin_lock_irq(&sk->receive_queue.lock);
592			skb = skb_peek(&sk->receive_queue);
593			if (skb != NULL) {
594				/*
595				 * We will only return the amount
596				 * of this packet since that is all
597				 * that will be read.
598				 */
599				amount = skb->len - sizeof(struct udphdr);
600			}
601			spin_unlock_irq(&sk->receive_queue.lock);
602			return put_user(amount, (int *)arg);
603		}
604
605		default:
606			return -ENOIOCTLCMD;
607	}
608	return(0);
609}
610
611static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
612{
613	return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
614}
615
616static __inline__ int udp_checksum_complete(struct sk_buff *skb)
617{
618	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
619		__udp_checksum_complete(skb);
620}
621
622/*
623 * 	This should be easy, if there is something there we
624 * 	return it, otherwise we block.
625 */
626
627int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
628		int noblock, int flags, int *addr_len)
629{
630  	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
631  	struct sk_buff *skb;
632  	int copied, err;
633
634	/*
635	 *	Check any passed addresses
636	 */
637	if (addr_len)
638		*addr_len=sizeof(*sin);
639
640	if (flags & MSG_ERRQUEUE)
641		return ip_recv_error(sk, msg, len);
642
643	skb = skb_recv_datagram(sk, flags, noblock, &err);
644	if (!skb)
645		goto out;
646
647  	copied = skb->len - sizeof(struct udphdr);
648	if (copied > len) {
649		copied = len;
650		msg->msg_flags |= MSG_TRUNC;
651	}
652
653	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
654		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
655					      copied);
656	} else if (msg->msg_flags&MSG_TRUNC) {
657		if (__udp_checksum_complete(skb))
658			goto csum_copy_err;
659		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
660					      copied);
661	} else {
662		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
663
664		if (err == -EINVAL)
665			goto csum_copy_err;
666	}
667
668	if (err)
669		goto out_free;
670
671	sock_recv_timestamp(msg, sk, skb);
672
673	/* Copy the address. */
674	if (sin)
675	{
676		sin->sin_family = AF_INET;
677		sin->sin_port = skb->h.uh->source;
678		sin->sin_addr.s_addr = skb->nh.iph->saddr;
679		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
680  	}
681	if (sk->protinfo.af_inet.cmsg_flags)
682		ip_cmsg_recv(msg, skb);
683	err = copied;
684
685out_free:
686  	skb_free_datagram(sk, skb);
687out:
688  	return err;
689
690csum_copy_err:
691	UDP_INC_STATS_BH(UdpInErrors);
692
693	/* Clear queue. */
694	if (flags&MSG_PEEK) {
695		int clear = 0;
696		spin_lock_irq(&sk->receive_queue.lock);
697		if (skb == skb_peek(&sk->receive_queue)) {
698			__skb_unlink(skb, &sk->receive_queue);
699			clear = 1;
700		}
701		spin_unlock_irq(&sk->receive_queue.lock);
702		if (clear)
703			kfree_skb(skb);
704	}
705
706	skb_free_datagram(sk, skb);
707
708	return -EAGAIN;
709}
710
711int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
712{
713	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
714	struct rtable *rt;
715	u32 saddr;
716	int oif;
717	int err;
718
719
720	if (addr_len < sizeof(*usin))
721	  	return -EINVAL;
722
723	if (usin->sin_family != AF_INET)
724	  	return -EAFNOSUPPORT;
725
726	sk_dst_reset(sk);
727
728	oif = sk->bound_dev_if;
729	saddr = sk->saddr;
730	if (MULTICAST(usin->sin_addr.s_addr)) {
731		if (!oif)
732			oif = sk->protinfo.af_inet.mc_index;
733		if (!saddr)
734			saddr = sk->protinfo.af_inet.mc_addr;
735	}
736	err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
737			       RT_CONN_FLAGS(sk), oif);
738	if (err)
739		return err;
740	if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
741		ip_rt_put(rt);
742		return -EACCES;
743	}
744  	if(!sk->saddr)
745	  	sk->saddr = rt->rt_src;		/* Update source address */
746	if(!sk->rcv_saddr)
747		sk->rcv_saddr = rt->rt_src;
748	sk->daddr = rt->rt_dst;
749	sk->dport = usin->sin_port;
750	sk->state = TCP_ESTABLISHED;
751	sk->protinfo.af_inet.id = jiffies;
752
753	sk_dst_set(sk, &rt->u.dst);
754	return(0);
755}
756
757int udp_disconnect(struct sock *sk, int flags)
758{
759	/*
760	 *	1003.1g - break association.
761	 */
762
763	sk->state = TCP_CLOSE;
764	sk->daddr = 0;
765	sk->dport = 0;
766	sk->bound_dev_if = 0;
767	if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) {
768		sk->rcv_saddr = 0;
769		sk->saddr = 0;
770#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
771		memset(&sk->net_pinfo.af_inet6.saddr, 0, 16);
772		memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16);
773#endif
774	}
775	if (!(sk->userlocks&SOCK_BINDPORT_LOCK)) {
776		sk->prot->unhash(sk);
777		sk->sport = 0;
778	}
779	sk_dst_reset(sk);
780	return 0;
781}
782
783static void udp_close(struct sock *sk, long timeout)
784{
785	inet_sock_release(sk);
786}
787
788static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
789{
790#if 1
791	struct udp_opt *up =  &(sk->tp_pinfo.af_udp);
792#endif
793	/*
794	 *	Charge it to the socket, dropping if the queue is full.
795	 */
796
797#if defined(CONFIG_FILTER)
798	if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
799		if (__udp_checksum_complete(skb)) {
800			UDP_INC_STATS_BH(UdpInErrors);
801			IP_INC_STATS_BH(IpInDiscards);
802			ip_statistics[smp_processor_id()*2].IpInDelivers--;
803			kfree_skb(skb);
804			return -1;
805		}
806		skb->ip_summed = CHECKSUM_UNNECESSARY;
807	}
808#endif
809
810	if (up->encap_type) {
811		/*
812		 * This is an encapsulation socket so pass the skb to
813		 * the socket's udp_encap_rcv() hook. Otherwise, just
814		 * fall through and pass this up the UDP socket.
815		 * up->encap_rcv() returns the following value:
816		 * =0 if skb was successfully passed to the encap
817		 *    handler or was discarded by it.
818		 * >0 if skb should be passed on to UDP.
819		 * <0 if skb should be resubmitted as proto -N
820		 */
821
822		/* if we're overly short, let UDP handle it */
823		if (skb->len > sizeof(struct udphdr) &&
824		    up->encap_rcv != NULL) {
825			int ret;
826
827			ret = (*up->encap_rcv)(sk, skb);
828			if (ret <= 0) {
829				UDP_INC_STATS_BH(UdpInDatagrams);
830				return -ret;
831			}
832		}
833
834		/* FALLTHROUGH -- it's a UDP Packet */
835	}
836
837#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
838	if (up->esp_in_udp) {
839		/*
840		 * Set skb->sk and xmit packet to ipsec_rcv.
841		 *
842		 * If ret != 0, ipsec_rcv refused the packet (not ESPinUDP),
843		 * restore skb->sk and fall back to sock_queue_rcv_skb
844		 */
845		struct inet_protocol *esp = NULL;
846
847#if defined(CONFIG_IPSEC) && !defined(CONFIG_IPSEC_MODULE)
848               /* optomize only when we know it is statically linked */
849		extern struct inet_protocol esp_protocol;
850		esp = &esp_protocol;
851#else
852		for (esp = (struct inet_protocol *)inet_protos[IPPROTO_ESP & (MAX_INET_PROTOS - 1)];
853			(esp) && (esp->protocol != IPPROTO_ESP);
854			esp = esp->next);
855#endif
856
857		if (esp && esp->handler) {
858			struct sock *sav_sk = skb->sk;
859			skb->sk = sk;
860			if (esp->handler(skb) == 0) {
861				skb->sk = sav_sk;
862				/*not sure we might count ESPinUDP as UDP...*/
863				UDP_INC_STATS_BH(UdpInDatagrams);
864				return 0;
865			}
866			skb->sk = sav_sk;
867		}
868	}
869#endif
870
871	if (sock_queue_rcv_skb(sk,skb)<0) {
872		UDP_INC_STATS_BH(UdpInErrors);
873		IP_INC_STATS_BH(IpInDiscards);
874		ip_statistics[smp_processor_id()*2].IpInDelivers--;
875		kfree_skb(skb);
876		return -1;
877	}
878	UDP_INC_STATS_BH(UdpInDatagrams);
879	return 0;
880}
881
882/*
883 *	Multicasts and broadcasts go to each listener.
884 *
885 *	Note: called only from the BH handler context,
886 *	so we don't need to lock the hashes.
887 */
888static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
889				 u32 saddr, u32 daddr)
890{
891	struct sock *sk;
892	int dif;
893
894	read_lock(&udp_hash_lock);
895	sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
896	dif = skb->dev->ifindex;
897	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
898	if (sk) {
899		struct sock *sknext = NULL;
900
901		do {
902			struct sk_buff *skb1 = skb;
903
904			sknext = udp_v4_mcast_next(sk->next, uh->dest, daddr,
905						   uh->source, saddr, dif);
906			if(sknext)
907				skb1 = skb_clone(skb, GFP_ATOMIC);
908
909			if(skb1)
910				udp_queue_rcv_skb(sk, skb1);
911			sk = sknext;
912		} while(sknext);
913	} else
914		kfree_skb(skb);
915	read_unlock(&udp_hash_lock);
916	return 0;
917}
918
919/* Initialize UDP checksum. If exited with zero value (success),
920 * CHECKSUM_UNNECESSARY means, that no more checks are required.
921 * Otherwise, csum completion requires chacksumming packet body,
922 * including udp header and folding it to skb->csum.
923 */
924static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
925			     unsigned short ulen, u32 saddr, u32 daddr)
926{
927	if (uh->check == 0) {
928		skb->ip_summed = CHECKSUM_UNNECESSARY;
929	} else if (skb->ip_summed == CHECKSUM_HW) {
930		skb->ip_summed = CHECKSUM_UNNECESSARY;
931		if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
932			return 0;
933		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
934		skb->ip_summed = CHECKSUM_NONE;
935	}
936	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
937		skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
938	/* Probably, we should checksum udp header (it should be in cache
939	 * in any case) and data in tiny packets (< rx copybreak).
940	 */
941	return 0;
942}
943
944/*
945 *	All we need to do is get the socket, and then do a checksum.
946 */
947
948int udp_rcv(struct sk_buff *skb)
949{
950  	struct sock *sk;
951  	struct udphdr *uh;
952	unsigned short ulen;
953	struct rtable *rt = (struct rtable*)skb->dst;
954	u32 saddr = skb->nh.iph->saddr;
955	u32 daddr = skb->nh.iph->daddr;
956	int len = skb->len;
957
958  	IP_INC_STATS_BH(IpInDelivers);
959
960	/*
961	 *	Validate the packet and the UDP length.
962	 */
963	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
964		goto no_header;
965
966  	uh = skb->h.uh;
967
968	ulen = ntohs(uh->len);
969
970	if (ulen > len || ulen < sizeof(*uh))
971		goto short_packet;
972
973	if (pskb_trim(skb, ulen))
974		goto short_packet;
975
976	if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
977		goto csum_error;
978
979	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
980		return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
981
982	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
983
984	if (sk != NULL) {
985		udp_queue_rcv_skb(sk, skb);
986		sock_put(sk);
987		return 0;
988	}
989
990	/* No socket. Drop packet silently, if checksum is wrong */
991	if (udp_checksum_complete(skb))
992		goto csum_error;
993
994	UDP_INC_STATS_BH(UdpNoPorts);
995	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
996
997	/*
998	 * Hmm.  We got an UDP packet to a port to which we
999	 * don't wanna listen.  Ignore it.
1000	 */
1001	kfree_skb(skb);
1002	return(0);
1003
1004short_packet:
1005	NETDEBUG(if (net_ratelimit())
1006		 printk(KERN_DEBUG "UDP: short packet: %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
1007			NIPQUAD(saddr),
1008			ntohs(uh->source),
1009			ulen,
1010			len,
1011			NIPQUAD(daddr),
1012			ntohs(uh->dest)));
1013no_header:
1014	UDP_INC_STATS_BH(UdpInErrors);
1015	kfree_skb(skb);
1016	return(0);
1017
1018csum_error:
1019	/*
1020	 * RFC1122: OK.  Discards the bad packet silently (as far as
1021	 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1022	 */
1023	NETDEBUG(if (net_ratelimit())
1024		 printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1025			NIPQUAD(saddr),
1026			ntohs(uh->source),
1027			NIPQUAD(daddr),
1028			ntohs(uh->dest),
1029			ulen));
1030	UDP_INC_STATS_BH(UdpInErrors);
1031	kfree_skb(skb);
1032	return(0);
1033}
1034
1035static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
1036{
1037	unsigned int dest, src;
1038	__u16 destp, srcp;
1039
1040	dest  = sp->daddr;
1041	src   = sp->rcv_saddr;
1042	destp = ntohs(sp->dport);
1043	srcp  = ntohs(sp->sport);
1044	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1045		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
1046		i, src, srcp, dest, destp, sp->state,
1047		atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
1048		0, 0L, 0,
1049		sock_i_uid(sp), 0,
1050		sock_i_ino(sp),
1051		atomic_read(&sp->refcnt), sp);
1052}
1053
1054int udp_get_info(char *buffer, char **start, off_t offset, int length)
1055{
1056	int len = 0, num = 0, i;
1057	off_t pos = 0;
1058	off_t begin;
1059	char tmpbuf[129];
1060
1061	if (offset < 128)
1062		len += sprintf(buffer, "%-127s\n",
1063			       "  sl  local_address rem_address   st tx_queue "
1064			       "rx_queue tr tm->when retrnsmt   uid  timeout inode");
1065	pos = 128;
1066	read_lock(&udp_hash_lock);
1067	for (i = 0; i < UDP_HTABLE_SIZE; i++) {
1068		struct sock *sk;
1069
1070		for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
1071			if (sk->family != PF_INET)
1072				continue;
1073			pos += 128;
1074			if (pos <= offset)
1075				continue;
1076			get_udp_sock(sk, tmpbuf, i);
1077			len += sprintf(buffer+len, "%-127s\n", tmpbuf);
1078			if(len >= length)
1079				goto out;
1080		}
1081	}
1082out:
1083	read_unlock(&udp_hash_lock);
1084	begin = len - (pos - offset);
1085	*start = buffer + begin;
1086	len -= begin;
1087	if(len > length)
1088		len = length;
1089	if (len < 0)
1090		len = 0;
1091	return len;
1092}
1093
1094static int udp_setsockopt(struct sock *sk, int level, int optname,
1095	char *optval, int optlen)
1096{
1097	struct udp_opt *tp = &(sk->tp_pinfo.af_udp);
1098	int val;
1099	int err = 0;
1100
1101	if (level != SOL_UDP)
1102		return ip_setsockopt(sk, level, optname, optval, optlen);
1103
1104	if(optlen<sizeof(int))
1105		return -EINVAL;
1106
1107	if (get_user(val, (int *)optval))
1108		return -EFAULT;
1109
1110	lock_sock(sk);
1111
1112	switch(optname) {
1113#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
1114#ifndef UDP_ESPINUDP
1115#define UDP_ESPINUDP 100
1116#endif
1117		case UDP_ESPINUDP:
1118			tp->esp_in_udp = val;
1119			break;
1120#endif
1121		default:
1122			err = -ENOPROTOOPT;
1123			break;
1124	}
1125
1126	release_sock(sk);
1127	return err;
1128}
1129
1130struct proto udp_prot = {
1131 	name:		"UDP",
1132	close:		udp_close,
1133	connect:	udp_connect,
1134	disconnect:	udp_disconnect,
1135	ioctl:		udp_ioctl,
1136	setsockopt:	udp_setsockopt,
1137	getsockopt:	ip_getsockopt,
1138	sendmsg:	udp_sendmsg,
1139	recvmsg:	udp_recvmsg,
1140	backlog_rcv:	udp_queue_rcv_skb,
1141	hash:		udp_v4_hash,
1142	unhash:		udp_v4_unhash,
1143	get_port:	udp_v4_get_port,
1144};
1145