Deleted Added
sdiff udiff text old ( 157374 ) new ( 157927 )
full compact
1/*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
30 * $FreeBSD: head/sys/netinet/udp_usrreq.c 157927 2006-04-21 09:25:40Z ps $
31 */
32
33#include "opt_ipfw.h"
34#include "opt_ipsec.h"
35#include "opt_inet6.h"
36#include "opt_mac.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/domain.h>
41#include <sys/eventhandler.h>
42#include <sys/jail.h>
43#include <sys/kernel.h>
44#include <sys/lock.h>
45#include <sys/mac.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/proc.h>
49#include <sys/protosw.h>
50#include <sys/signalvar.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#include <sys/sx.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56
57#include <vm/uma.h>
58
59#include <net/if.h>
60#include <net/route.h>
61
62#include <netinet/in.h>
63#include <netinet/in_systm.h>
64#include <netinet/in_pcb.h>
65#include <netinet/in_var.h>
66#include <netinet/ip.h>
67#ifdef INET6
68#include <netinet/ip6.h>
69#endif
70#include <netinet/ip_icmp.h>
71#include <netinet/icmp_var.h>
72#include <netinet/ip_var.h>
73#include <netinet/ip_options.h>
74#ifdef INET6
75#include <netinet6/ip6_var.h>
76#endif
77#include <netinet/udp.h>
78#include <netinet/udp_var.h>
79
80#ifdef FAST_IPSEC
81#include <netipsec/ipsec.h>
82#endif /*FAST_IPSEC*/
83
84#ifdef IPSEC
85#include <netinet6/ipsec.h>
86#endif /*IPSEC*/
87
88#include <machine/in_cksum.h>
89
90/*
91 * UDP protocol implementation.
92 * Per RFC 768, August, 1980.
93 */
94#ifndef COMPAT_42
95static int udpcksum = 1;
96#else
97static int udpcksum = 0; /* XXX */
98#endif
99SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
100 &udpcksum, 0, "");
101
102int log_in_vain = 0;
103SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
104 &log_in_vain, 0, "Log all incoming UDP packets");
105
106static int blackhole = 0;
107SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
108 &blackhole, 0, "Do not send port unreachables for refused connects");
109
110static int strict_mcast_mship = 0;
111SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW,
112 &strict_mcast_mship, 0, "Only send multicast to member sockets");
113
114struct inpcbhead udb; /* from udp_var.h */
115#define udb6 udb /* for KAME src sync over BSD*'s */
116struct inpcbinfo udbinfo;
117
118#ifndef UDBHASHSIZE
119#define UDBHASHSIZE 16
120#endif
121
122struct udpstat udpstat; /* from udp_var.h */
123SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
124 &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
125
126static void udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
127 int off, struct sockaddr_in *udp_in);
128
129static void udp_detach(struct socket *so);
130static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
131 struct mbuf *, struct thread *);
132
133static void
134udp_zone_change(void *tag)
135{
136
137 uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
138}
139
140void
141udp_init()
142{
143 INP_INFO_LOCK_INIT(&udbinfo, "udp");
144 LIST_INIT(&udb);
145 udbinfo.listhead = &udb;
146 udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
147 udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
148 &udbinfo.porthashmask);
149 udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL,
150 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
151 uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
152 EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
153 EVENTHANDLER_PRI_ANY);
154}
155
156void
157udp_input(m, off)
158 register struct mbuf *m;
159 int off;
160{
161 int iphlen = off;
162 register struct ip *ip;
163 register struct udphdr *uh;
164 register struct inpcb *inp;
165 int len;
166 struct ip save_ip;
167 struct sockaddr_in udp_in;
168#ifdef IPFIREWALL_FORWARD
169 struct m_tag *fwd_tag;
170#endif
171
172 udpstat.udps_ipackets++;
173
174 /*
175 * Strip IP options, if any; should skip this,
176 * make available to user, and use on returned packets,
177 * but we don't yet have a way to check the checksum
178 * with options still present.
179 */
180 if (iphlen > sizeof (struct ip)) {
181 ip_stripoptions(m, (struct mbuf *)0);
182 iphlen = sizeof(struct ip);
183 }
184
185 /*
186 * Get IP and UDP header together in first mbuf.
187 */
188 ip = mtod(m, struct ip *);
189 if (m->m_len < iphlen + sizeof(struct udphdr)) {
190 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
191 udpstat.udps_hdrops++;
192 return;
193 }
194 ip = mtod(m, struct ip *);
195 }
196 uh = (struct udphdr *)((caddr_t)ip + iphlen);
197
198 /* destination port of 0 is illegal, based on RFC768. */
199 if (uh->uh_dport == 0)
200 goto badunlocked;
201
202 /*
203 * Construct sockaddr format source address.
204 * Stuff source address and datagram in user buffer.
205 */
206 bzero(&udp_in, sizeof(udp_in));
207 udp_in.sin_len = sizeof(udp_in);
208 udp_in.sin_family = AF_INET;
209 udp_in.sin_port = uh->uh_sport;
210 udp_in.sin_addr = ip->ip_src;
211
212 /*
213 * Make mbuf data length reflect UDP length.
214 * If not enough data to reflect UDP length, drop.
215 */
216 len = ntohs((u_short)uh->uh_ulen);
217 if (ip->ip_len != len) {
218 if (len > ip->ip_len || len < sizeof(struct udphdr)) {
219 udpstat.udps_badlen++;
220 goto badunlocked;
221 }
222 m_adj(m, len - ip->ip_len);
223 /* ip->ip_len = len; */
224 }
225 /*
226 * Save a copy of the IP header in case we want restore it
227 * for sending an ICMP error message in response.
228 */
229 if (!blackhole)
230 save_ip = *ip;
231
232 /*
233 * Checksum extended UDP header and data.
234 */
235 if (uh->uh_sum) {
236 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
237 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
238 uh->uh_sum = m->m_pkthdr.csum_data;
239 else
240 uh->uh_sum = in_pseudo(ip->ip_src.s_addr,
241 ip->ip_dst.s_addr, htonl((u_short)len +
242 m->m_pkthdr.csum_data + IPPROTO_UDP));
243 uh->uh_sum ^= 0xffff;
244 } else {
245 char b[9];
246 bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
247 bzero(((struct ipovly *)ip)->ih_x1, 9);
248 ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
249 uh->uh_sum = in_cksum(m, len + sizeof (struct ip));
250 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
251 }
252 if (uh->uh_sum) {
253 udpstat.udps_badsum++;
254 m_freem(m);
255 return;
256 }
257 } else
258 udpstat.udps_nosum++;
259
260#ifdef IPFIREWALL_FORWARD
261 /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
262 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
263
264 if (fwd_tag != NULL) {
265 struct sockaddr_in *next_hop;
266
267 /* Do the hack. */
268 next_hop = (struct sockaddr_in *)(fwd_tag + 1);
269 ip->ip_dst = next_hop->sin_addr;
270 uh->uh_dport = ntohs(next_hop->sin_port);
271 /* Remove the tag from the packet. We don't need it anymore. */
272 m_tag_delete(m, fwd_tag);
273 }
274#endif
275
276 INP_INFO_RLOCK(&udbinfo);
277
278 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
279 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
280 struct inpcb *last;
281 /*
282 * Deliver a multicast or broadcast datagram to *all* sockets
283 * for which the local and remote addresses and ports match
284 * those of the incoming datagram. This allows more than
285 * one process to receive multi/broadcasts on the same port.
286 * (This really ought to be done for unicast datagrams as
287 * well, but that would cause problems with existing
288 * applications that open both address-specific sockets and
289 * a wildcard socket listening to the same port -- they would
290 * end up receiving duplicates of every unicast datagram.
291 * Those applications open the multiple sockets to overcome an
292 * inadequacy of the UDP socket interface, but for backwards
293 * compatibility we avoid the problem here rather than
294 * fixing the interface. Maybe 4.5BSD will remedy this?)
295 */
296
297 /*
298 * Locate pcb(s) for datagram.
299 * (Algorithm copied from raw_intr().)
300 */
301 last = NULL;
302 LIST_FOREACH(inp, &udb, inp_list) {
303 if (inp->inp_lport != uh->uh_dport)
304 continue;
305#ifdef INET6
306 if ((inp->inp_vflag & INP_IPV4) == 0)
307 continue;
308#endif
309 if (inp->inp_laddr.s_addr != INADDR_ANY) {
310 if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
311 continue;
312 }
313 if (inp->inp_faddr.s_addr != INADDR_ANY) {
314 if (inp->inp_faddr.s_addr !=
315 ip->ip_src.s_addr ||
316 inp->inp_fport != uh->uh_sport)
317 continue;
318 }
319 INP_LOCK(inp);
320
321 /*
322 * Check multicast packets to make sure they are only
323 * sent to sockets with multicast memberships for the
324 * packet's destination address and arrival interface
325 */
326#define MSHIP(_inp, n) ((_inp)->inp_moptions->imo_membership[(n)])
327#define NMSHIPS(_inp) ((_inp)->inp_moptions->imo_num_memberships)
328 if (strict_mcast_mship && inp->inp_moptions != NULL) {
329 int mship, foundmship = 0;
330
331 for (mship = 0; mship < NMSHIPS(inp); mship++) {
332 if (MSHIP(inp, mship)->inm_addr.s_addr
333 == ip->ip_dst.s_addr &&
334 MSHIP(inp, mship)->inm_ifp
335 == m->m_pkthdr.rcvif) {
336 foundmship = 1;
337 break;
338 }
339 }
340 if (foundmship == 0) {
341 INP_UNLOCK(inp);
342 continue;
343 }
344 }
345#undef NMSHIPS
346#undef MSHIP
347 if (last != NULL) {
348 struct mbuf *n;
349
350 n = m_copy(m, 0, M_COPYALL);
351 if (n != NULL)
352 udp_append(last, ip, n,
353 iphlen +
354 sizeof(struct udphdr),
355 &udp_in);
356 INP_UNLOCK(last);
357 }
358 last = inp;
359 /*
360 * Don't look for additional matches if this one does
361 * not have either the SO_REUSEPORT or SO_REUSEADDR
362 * socket options set. This heuristic avoids searching
363 * through all pcbs in the common case of a non-shared
364 * port. It * assumes that an application will never
365 * clear these options after setting them.
366 */
367 if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0)
368 break;
369 }
370
371 if (last == NULL) {
372 /*
373 * No matching pcb found; discard datagram.
374 * (No need to send an ICMP Port Unreachable
375 * for a broadcast or multicast datgram.)
376 */
377 udpstat.udps_noportbcast++;
378 goto badheadlocked;
379 }
380 udp_append(last, ip, m, iphlen + sizeof(struct udphdr),
381 &udp_in);
382 INP_UNLOCK(last);
383 INP_INFO_RUNLOCK(&udbinfo);
384 return;
385 }
386 /*
387 * Locate pcb for datagram.
388 */
389 inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
390 ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
391 if (inp == NULL) {
392 if (log_in_vain) {
393 char buf[4*sizeof "123"];
394
395 strcpy(buf, inet_ntoa(ip->ip_dst));
396 log(LOG_INFO,
397 "Connection attempt to UDP %s:%d from %s:%d\n",
398 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
399 ntohs(uh->uh_sport));
400 }
401 udpstat.udps_noport++;
402 if (m->m_flags & (M_BCAST | M_MCAST)) {
403 udpstat.udps_noportbcast++;
404 goto badheadlocked;
405 }
406 if (blackhole)
407 goto badheadlocked;
408 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
409 goto badheadlocked;
410 *ip = save_ip;
411 ip->ip_len += iphlen;
412 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
413 INP_INFO_RUNLOCK(&udbinfo);
414 return;
415 }
416 INP_LOCK(inp);
417 /* Check the minimum TTL for socket. */
418 if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
419 goto badheadlocked;
420 udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in);
421 INP_UNLOCK(inp);
422 INP_INFO_RUNLOCK(&udbinfo);
423 return;
424
425badheadlocked:
426 if (inp)
427 INP_UNLOCK(inp);
428 INP_INFO_RUNLOCK(&udbinfo);
429badunlocked:
430 m_freem(m);
431 return;
432}
433
434/*
435 * Subroutine of udp_input(), which appends the provided mbuf chain to the
436 * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that
437 * contains the source address. If the socket ends up being an IPv6 socket,
438 * udp_append() will convert to a sockaddr_in6 before passing the address
439 * into the socket code.
440 */
441static void
442udp_append(last, ip, n, off, udp_in)
443 struct inpcb *last;
444 struct ip *ip;
445 struct mbuf *n;
446 int off;
447 struct sockaddr_in *udp_in;
448{
449 struct sockaddr *append_sa;
450 struct socket *so;
451 struct mbuf *opts = 0;
452#ifdef INET6
453 struct sockaddr_in6 udp_in6;
454#endif
455
456 INP_LOCK_ASSERT(last);
457
458#if defined(IPSEC) || defined(FAST_IPSEC)
459 /* check AH/ESP integrity. */
460 if (ipsec4_in_reject(n, last)) {
461#ifdef IPSEC
462 ipsecstat.in_polvio++;
463#endif /*IPSEC*/
464 m_freem(n);
465 return;
466 }
467#endif /*IPSEC || FAST_IPSEC*/
468#ifdef MAC
469 if (mac_check_inpcb_deliver(last, n) != 0) {
470 m_freem(n);
471 return;
472 }
473#endif
474 if (last->inp_flags & INP_CONTROLOPTS ||
475 last->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
476#ifdef INET6
477 if (last->inp_vflag & INP_IPV6) {
478 int savedflags;
479
480 savedflags = last->inp_flags;
481 last->inp_flags &= ~INP_UNMAPPABLEOPTS;
482 ip6_savecontrol(last, n, &opts);
483 last->inp_flags = savedflags;
484 } else
485#endif
486 ip_savecontrol(last, &opts, ip, n);
487 }
488#ifdef INET6
489 if (last->inp_vflag & INP_IPV6) {
490 bzero(&udp_in6, sizeof(udp_in6));
491 udp_in6.sin6_len = sizeof(udp_in6);
492 udp_in6.sin6_family = AF_INET6;
493 in6_sin_2_v4mapsin6(udp_in, &udp_in6);
494 append_sa = (struct sockaddr *)&udp_in6;
495 } else
496#endif
497 append_sa = (struct sockaddr *)udp_in;
498 m_adj(n, off);
499
500 so = last->inp_socket;
501 SOCKBUF_LOCK(&so->so_rcv);
502 if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
503 m_freem(n);
504 if (opts)
505 m_freem(opts);
506 udpstat.udps_fullsock++;
507 SOCKBUF_UNLOCK(&so->so_rcv);
508 } else
509 sorwakeup_locked(so);
510}
511
512/*
513 * Notify a udp user of an asynchronous error;
514 * just wake up so that he can collect error status.
515 */
516struct inpcb *
517udp_notify(inp, errno)
518 register struct inpcb *inp;
519 int errno;
520{
521 inp->inp_socket->so_error = errno;
522 sorwakeup(inp->inp_socket);
523 sowwakeup(inp->inp_socket);
524 return inp;
525}
526
527void
528udp_ctlinput(cmd, sa, vip)
529 int cmd;
530 struct sockaddr *sa;
531 void *vip;
532{
533 struct ip *ip = vip;
534 struct udphdr *uh;
535 struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
536 struct in_addr faddr;
537 struct inpcb *inp;
538
539 faddr = ((struct sockaddr_in *)sa)->sin_addr;
540 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
541 return;
542
543 /*
544 * Redirects don't need to be handled up here.
545 */
546 if (PRC_IS_REDIRECT(cmd))
547 return;
548 /*
549 * Hostdead is ugly because it goes linearly through all PCBs.
550 * XXX: We never get this from ICMP, otherwise it makes an
551 * excellent DoS attack on machines with many connections.
552 */
553 if (cmd == PRC_HOSTDEAD)
554 ip = 0;
555 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
556 return;
557 if (ip) {
558 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
559 INP_INFO_RLOCK(&udbinfo);
560 inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
561 ip->ip_src, uh->uh_sport, 0, NULL);
562 if (inp != NULL) {
563 INP_LOCK(inp);
564 if (inp->inp_socket != NULL) {
565 (*notify)(inp, inetctlerrmap[cmd]);
566 }
567 INP_UNLOCK(inp);
568 }
569 INP_INFO_RUNLOCK(&udbinfo);
570 } else
571 in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify);
572}
573
574static int
575udp_pcblist(SYSCTL_HANDLER_ARGS)
576{
577 int error, i, n;
578 struct inpcb *inp, **inp_list;
579 inp_gen_t gencnt;
580 struct xinpgen xig;
581
582 /*
583 * The process of preparing the TCB list is too time-consuming and
584 * resource-intensive to repeat twice on every request.
585 */
586 if (req->oldptr == 0) {
587 n = udbinfo.ipi_count;
588 req->oldidx = 2 * (sizeof xig)
589 + (n + n/8) * sizeof(struct xinpcb);
590 return 0;
591 }
592
593 if (req->newptr != 0)
594 return EPERM;
595
596 /*
597 * OK, now we're committed to doing something.
598 */
599 INP_INFO_RLOCK(&udbinfo);
600 gencnt = udbinfo.ipi_gencnt;
601 n = udbinfo.ipi_count;
602 INP_INFO_RUNLOCK(&udbinfo);
603
604 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
605 + n * sizeof(struct xinpcb));
606 if (error != 0)
607 return (error);
608
609 xig.xig_len = sizeof xig;
610 xig.xig_count = n;
611 xig.xig_gen = gencnt;
612 xig.xig_sogen = so_gencnt;
613 error = SYSCTL_OUT(req, &xig, sizeof xig);
614 if (error)
615 return error;
616
617 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
618 if (inp_list == 0)
619 return ENOMEM;
620
621 INP_INFO_RLOCK(&udbinfo);
622 for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n;
623 inp = LIST_NEXT(inp, inp_list)) {
624 INP_LOCK(inp);
625 if (inp->inp_gencnt <= gencnt &&
626 cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
627 inp_list[i++] = inp;
628 INP_UNLOCK(inp);
629 }
630 INP_INFO_RUNLOCK(&udbinfo);
631 n = i;
632
633 error = 0;
634 for (i = 0; i < n; i++) {
635 inp = inp_list[i];
636 if (inp->inp_gencnt <= gencnt) {
637 struct xinpcb xi;
638 bzero(&xi, sizeof(xi));
639 xi.xi_len = sizeof xi;
640 /* XXX should avoid extra copy */
641 bcopy(inp, &xi.xi_inp, sizeof *inp);
642 if (inp->inp_socket)
643 sotoxsocket(inp->inp_socket, &xi.xi_socket);
644 xi.xi_inp.inp_gencnt = inp->inp_gencnt;
645 error = SYSCTL_OUT(req, &xi, sizeof xi);
646 }
647 }
648 if (!error) {
649 /*
650 * Give the user an updated idea of our state.
651 * If the generation differs from what we told
652 * her before, she knows that something happened
653 * while we were processing this request, and it
654 * might be necessary to retry.
655 */
656 INP_INFO_RLOCK(&udbinfo);
657 xig.xig_gen = udbinfo.ipi_gencnt;
658 xig.xig_sogen = so_gencnt;
659 xig.xig_count = udbinfo.ipi_count;
660 INP_INFO_RUNLOCK(&udbinfo);
661 error = SYSCTL_OUT(req, &xig, sizeof xig);
662 }
663 free(inp_list, M_TEMP);
664 return error;
665}
666
667SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
668 udp_pcblist, "S,xinpcb", "List of active UDP sockets");
669
670static int
671udp_getcred(SYSCTL_HANDLER_ARGS)
672{
673 struct xucred xuc;
674 struct sockaddr_in addrs[2];
675 struct inpcb *inp;
676 int error;
677
678 error = suser_cred(req->td->td_ucred, SUSER_ALLOWJAIL);
679 if (error)
680 return (error);
681 error = SYSCTL_IN(req, addrs, sizeof(addrs));
682 if (error)
683 return (error);
684 INP_INFO_RLOCK(&udbinfo);
685 inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
686 addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
687 if (inp == NULL || inp->inp_socket == NULL) {
688 error = ENOENT;
689 goto out;
690 }
691 error = cr_canseesocket(req->td->td_ucred, inp->inp_socket);
692 if (error)
693 goto out;
694 cru2x(inp->inp_socket->so_cred, &xuc);
695out:
696 INP_INFO_RUNLOCK(&udbinfo);
697 if (error == 0)
698 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
699 return (error);
700}
701
702SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
703 CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
704 udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
705
706static int
707udp_output(inp, m, addr, control, td)
708 register struct inpcb *inp;
709 struct mbuf *m;
710 struct sockaddr *addr;
711 struct mbuf *control;
712 struct thread *td;
713{
714 register struct udpiphdr *ui;
715 register int len = m->m_pkthdr.len;
716 struct in_addr faddr, laddr;
717 struct cmsghdr *cm;
718 struct sockaddr_in *sin, src;
719 int error = 0;
720 int ipflags;
721 u_short fport, lport;
722 int unlock_udbinfo;
723
724 /*
725 * udp_output() may need to temporarily bind or connect the current
726 * inpcb. As such, we don't know up front what inpcb locks we will
727 * need. Do any work to decide what is needed up front before
728 * acquiring locks.
729 */
730 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
731 if (control)
732 m_freem(control);
733 m_freem(m);
734 return EMSGSIZE;
735 }
736
737 src.sin_addr.s_addr = INADDR_ANY;
738 if (control != NULL) {
739 /*
740 * XXX: Currently, we assume all the optional information
741 * is stored in a single mbuf.
742 */
743 if (control->m_next) {
744 m_freem(control);
745 m_freem(m);
746 return EINVAL;
747 }
748 for (; control->m_len > 0;
749 control->m_data += CMSG_ALIGN(cm->cmsg_len),
750 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
751 cm = mtod(control, struct cmsghdr *);
752 if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 ||
753 cm->cmsg_len > control->m_len) {
754 error = EINVAL;
755 break;
756 }
757 if (cm->cmsg_level != IPPROTO_IP)
758 continue;
759
760 switch (cm->cmsg_type) {
761 case IP_SENDSRCADDR:
762 if (cm->cmsg_len !=
763 CMSG_LEN(sizeof(struct in_addr))) {
764 error = EINVAL;
765 break;
766 }
767 bzero(&src, sizeof(src));
768 src.sin_family = AF_INET;
769 src.sin_len = sizeof(src);
770 src.sin_port = inp->inp_lport;
771 src.sin_addr = *(struct in_addr *)CMSG_DATA(cm);
772 break;
773 default:
774 error = ENOPROTOOPT;
775 break;
776 }
777 if (error)
778 break;
779 }
780 m_freem(control);
781 }
782 if (error) {
783 m_freem(m);
784 return error;
785 }
786
787 if (src.sin_addr.s_addr != INADDR_ANY ||
788 addr != NULL) {
789 INP_INFO_WLOCK(&udbinfo);
790 unlock_udbinfo = 1;
791 } else
792 unlock_udbinfo = 0;
793 INP_LOCK(inp);
794
795#ifdef MAC
796 mac_create_mbuf_from_inpcb(inp, m);
797#endif
798
799 laddr = inp->inp_laddr;
800 lport = inp->inp_lport;
801 if (src.sin_addr.s_addr != INADDR_ANY) {
802 if (lport == 0) {
803 error = EINVAL;
804 goto release;
805 }
806 error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
807 &laddr.s_addr, &lport, td->td_ucred);
808 if (error)
809 goto release;
810 }
811
812 if (addr) {
813 sin = (struct sockaddr_in *)addr;
814 if (jailed(td->td_ucred))
815 prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
816 if (inp->inp_faddr.s_addr != INADDR_ANY) {
817 error = EISCONN;
818 goto release;
819 }
820 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport,
821 &faddr.s_addr, &fport, NULL, td->td_ucred);
822 if (error)
823 goto release;
824
825 /* Commit the local port if newly assigned. */
826 if (inp->inp_laddr.s_addr == INADDR_ANY &&
827 inp->inp_lport == 0) {
828 /*
829 * Remember addr if jailed, to prevent rebinding.
830 */
831 if (jailed(td->td_ucred))
832 inp->inp_laddr = laddr;
833 inp->inp_lport = lport;
834 if (in_pcbinshash(inp) != 0) {
835 inp->inp_lport = 0;
836 error = EAGAIN;
837 goto release;
838 }
839 inp->inp_flags |= INP_ANONPORT;
840 }
841 } else {
842 faddr = inp->inp_faddr;
843 fport = inp->inp_fport;
844 if (faddr.s_addr == INADDR_ANY) {
845 error = ENOTCONN;
846 goto release;
847 }
848 }
849
850 /*
851 * Calculate data length and get a mbuf for UDP, IP, and possible
852 * link-layer headers. Immediate slide the data pointer back forward
853 * since we won't use that space at this layer.
854 */
855 M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT);
856 if (m == NULL) {
857 error = ENOBUFS;
858 goto release;
859 }
860 m->m_data += max_linkhdr;
861 m->m_len -= max_linkhdr;
862 m->m_pkthdr.len -= max_linkhdr;
863
864 /*
865 * Fill in mbuf with extended UDP header
866 * and addresses and length put into network format.
867 */
868 ui = mtod(m, struct udpiphdr *);
869 bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */
870 ui->ui_pr = IPPROTO_UDP;
871 ui->ui_src = laddr;
872 ui->ui_dst = faddr;
873 ui->ui_sport = lport;
874 ui->ui_dport = fport;
875 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
876
877 /*
878 * Set the Don't Fragment bit in the IP header.
879 */
880 if (inp->inp_flags & INP_DONTFRAG) {
881 struct ip *ip;
882 ip = (struct ip *)&ui->ui_i;
883 ip->ip_off |= IP_DF;
884 }
885
886 ipflags = 0;
887 if (inp->inp_socket->so_options & SO_DONTROUTE)
888 ipflags |= IP_ROUTETOIF;
889 if (inp->inp_socket->so_options & SO_BROADCAST)
890 ipflags |= IP_ALLOWBROADCAST;
891 if (inp->inp_vflag & INP_ONESBCAST)
892 ipflags |= IP_SENDONES;
893
894 /*
895 * Set up checksum and output datagram.
896 */
897 if (udpcksum) {
898 if (inp->inp_vflag & INP_ONESBCAST)
899 faddr.s_addr = INADDR_BROADCAST;
900 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
901 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
902 m->m_pkthdr.csum_flags = CSUM_UDP;
903 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
904 } else {
905 ui->ui_sum = 0;
906 }
907 ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
908 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
909 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */
910 udpstat.udps_opackets++;
911
912 if (unlock_udbinfo)
913 INP_INFO_WUNLOCK(&udbinfo);
914 error = ip_output(m, inp->inp_options, NULL, ipflags,
915 inp->inp_moptions, inp);
916 INP_UNLOCK(inp);
917 return (error);
918
919release:
920 INP_UNLOCK(inp);
921 if (unlock_udbinfo)
922 INP_INFO_WUNLOCK(&udbinfo);
923 m_freem(m);
924 return (error);
925}
926
927u_long udp_sendspace = 9216; /* really max datagram size */
928 /* 40 1K datagrams */
929SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
930 &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
931
932u_long udp_recvspace = 40 * (1024 +
933#ifdef INET6
934 sizeof(struct sockaddr_in6)
935#else
936 sizeof(struct sockaddr_in)
937#endif
938 );
939SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
940 &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
941
942static void
943udp_abort(struct socket *so)
944{
945 struct inpcb *inp;
946
947 inp = sotoinpcb(so);
948 KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
949 INP_INFO_WLOCK(&udbinfo);
950 INP_LOCK(inp);
951 soisdisconnected(so);
952 in_pcbdetach(inp);
953 in_pcbfree(inp);
954 INP_INFO_WUNLOCK(&udbinfo);
955}
956
957static int
958udp_attach(struct socket *so, int proto, struct thread *td)
959{
960 struct inpcb *inp;
961 int error;
962
963 inp = sotoinpcb(so);
964 KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
965 INP_INFO_WLOCK(&udbinfo);
966 error = soreserve(so, udp_sendspace, udp_recvspace);
967 if (error) {
968 INP_INFO_WUNLOCK(&udbinfo);
969 return error;
970 }
971 error = in_pcballoc(so, &udbinfo, "udpinp");
972 if (error) {
973 INP_INFO_WUNLOCK(&udbinfo);
974 return error;
975 }
976
977 inp = (struct inpcb *)so->so_pcb;
978 INP_LOCK(inp);
979 INP_INFO_WUNLOCK(&udbinfo);
980 inp->inp_vflag |= INP_IPV4;
981 inp->inp_ip_ttl = ip_defttl;
982 INP_UNLOCK(inp);
983 return 0;
984}
985
986static int
987udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
988{
989 struct inpcb *inp;
990 int error;
991
992 inp = sotoinpcb(so);
993 KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
994 INP_INFO_WLOCK(&udbinfo);
995 INP_LOCK(inp);
996 error = in_pcbbind(inp, nam, td->td_ucred);
997 INP_UNLOCK(inp);
998 INP_INFO_WUNLOCK(&udbinfo);
999 return error;
1000}
1001
1002static int
1003udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1004{
1005 struct inpcb *inp;
1006 int error;
1007 struct sockaddr_in *sin;
1008
1009 inp = sotoinpcb(so);
1010 KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
1011 INP_INFO_WLOCK(&udbinfo);
1012 INP_LOCK(inp);
1013 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1014 INP_UNLOCK(inp);
1015 INP_INFO_WUNLOCK(&udbinfo);
1016 return EISCONN;
1017 }
1018 sin = (struct sockaddr_in *)nam;
1019 if (jailed(td->td_ucred))
1020 prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
1021 error = in_pcbconnect(inp, nam, td->td_ucred);
1022 if (error == 0)
1023 soisconnected(so);
1024 INP_UNLOCK(inp);
1025 INP_INFO_WUNLOCK(&udbinfo);
1026 return error;
1027}
1028
1029static void
1030udp_detach(struct socket *so)
1031{
1032 struct inpcb *inp;
1033
1034 inp = sotoinpcb(so);
1035 KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
1036 INP_INFO_WLOCK(&udbinfo);
1037 INP_LOCK(inp);
1038 in_pcbdetach(inp);
1039 in_pcbfree(inp);
1040 INP_INFO_WUNLOCK(&udbinfo);
1041}
1042
1043static int
1044udp_disconnect(struct socket *so)
1045{
1046 struct inpcb *inp;
1047
1048 inp = sotoinpcb(so);
1049 KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
1050 INP_INFO_WLOCK(&udbinfo);
1051 INP_LOCK(inp);
1052 if (inp->inp_faddr.s_addr == INADDR_ANY) {
1053 INP_INFO_WUNLOCK(&udbinfo);
1054 INP_UNLOCK(inp);
1055 return ENOTCONN;
1056 }
1057
1058 in_pcbdisconnect(inp);
1059 inp->inp_laddr.s_addr = INADDR_ANY;
1060 INP_UNLOCK(inp);
1061 INP_INFO_WUNLOCK(&udbinfo);
1062 so->so_state &= ~SS_ISCONNECTED; /* XXX */
1063 return 0;
1064}
1065
1066static int
1067udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1068 struct mbuf *control, struct thread *td)
1069{
1070 struct inpcb *inp;
1071
1072 inp = sotoinpcb(so);
1073 KASSERT(inp != NULL, ("udp_send: inp == NULL"));
1074 return udp_output(inp, m, addr, control, td);
1075}
1076
1077int
1078udp_shutdown(struct socket *so)
1079{
1080 struct inpcb *inp;
1081
1082 inp = sotoinpcb(so);
1083 KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
1084 INP_LOCK(inp);
1085 socantsendmore(so);
1086 INP_UNLOCK(inp);
1087 return 0;
1088}
1089
1090/*
1091 * This is the wrapper function for in_setsockaddr. We just pass down
1092 * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking
1093 * here because in_setsockaddr will call malloc and might block.
1094 */
1095static int
1096udp_sockaddr(struct socket *so, struct sockaddr **nam)
1097{
1098 return (in_setsockaddr(so, nam, &udbinfo));
1099}
1100
1101/*
1102 * This is the wrapper function for in_setpeeraddr. We just pass down
1103 * the pcbinfo for in_setpeeraddr to lock.
1104 */
1105static int
1106udp_peeraddr(struct socket *so, struct sockaddr **nam)
1107{
1108 return (in_setpeeraddr(so, nam, &udbinfo));
1109}
1110
1111struct pr_usrreqs udp_usrreqs = {
1112 .pru_abort = udp_abort,
1113 .pru_attach = udp_attach,
1114 .pru_bind = udp_bind,
1115 .pru_connect = udp_connect,
1116 .pru_control = in_control,
1117 .pru_detach = udp_detach,
1118 .pru_disconnect = udp_disconnect,
1119 .pru_peeraddr = udp_peeraddr,
1120 .pru_send = udp_send,
1121 .pru_shutdown = udp_shutdown,
1122 .pru_sockaddr = udp_sockaddr,
1123 .pru_sosetlabel = in_pcbsosetlabel
1124};