Deleted Added
sdiff udiff text old ( 83366 ) new ( 83934 )
full compact
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
34 * $FreeBSD: head/sys/netinet/ip_output.c 83366 2001-09-12 08:38:13Z julian $
35 */
36
37#define _IP_VHL
38
39#include "opt_ipfw.h"
40#include "opt_ipdn.h"
41#include "opt_ipdivert.h"
42#include "opt_ipfilter.h"
43#include "opt_ipsec.h"
44#include "opt_pfil_hooks.h"
45#include "opt_random_ip_id.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/kernel.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/protosw.h>
53#include <sys/socket.h>
54#include <sys/socketvar.h>
55
56#include <net/if.h>
57#include <net/route.h>
58
59#include <netinet/in.h>
60#include <netinet/in_systm.h>
61#include <netinet/ip.h>
62#include <netinet/in_pcb.h>
63#include <netinet/in_var.h>
64#include <netinet/ip_var.h>
65
66#include "faith.h"
67
68#include <machine/in_cksum.h>
69
70static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
71
72#ifdef IPSEC
73#include <netinet6/ipsec.h>
74#include <netkey/key.h>
75#ifdef IPSEC_DEBUG
76#include <netkey/key_debug.h>
77#else
78#define KEYDEBUG(lev,arg)
79#endif
80#endif /*IPSEC*/
81
82#include <netinet/ip_fw.h>
83
84#ifdef DUMMYNET
85#include <netinet/ip_dummynet.h>
86#endif
87
88#ifdef IPFIREWALL_FORWARD_DEBUG
89#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
90 (ntohl(a.s_addr)>>16)&0xFF,\
91 (ntohl(a.s_addr)>>8)&0xFF,\
92 (ntohl(a.s_addr))&0xFF);
93#endif
94
95u_short ip_id;
96
97static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
98static struct ifnet *ip_multicast_if __P((struct in_addr *, int *));
99static void ip_mloopback
100 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
101static int ip_getmoptions
102 __P((struct sockopt *, struct ip_moptions *));
103static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
104static int ip_setmoptions
105 __P((struct sockopt *, struct ip_moptions **));
106
107int ip_optcopy __P((struct ip *, struct ip *));
108
109
110extern struct protosw inetsw[];
111
112/*
113 * IP output. The packet in mbuf chain m contains a skeletal IP
114 * header (with len, off, ttl, proto, tos, src, dst).
115 * The mbuf chain containing the packet will be freed.
116 * The mbuf opt, if present, will not be freed.
117 */
118int
119ip_output(m0, opt, ro, flags, imo)
120 struct mbuf *m0;
121 struct mbuf *opt;
122 struct route *ro;
123 int flags;
124 struct ip_moptions *imo;
125{
126 struct ip *ip, *mhip;
127 struct ifnet *ifp;
128 struct mbuf *m = m0;
129 int hlen = sizeof (struct ip);
130 int len, off, error = 0;
131 struct sockaddr_in *dst;
132 struct in_ifaddr *ia;
133 int isbroadcast, sw_csum;
134#ifdef IPSEC
135 struct route iproute;
136 struct socket *so = NULL;
137 struct secpolicy *sp = NULL;
138#endif
139 u_int16_t divert_cookie; /* firewall cookie */
140#ifdef PFIL_HOOKS
141 struct packet_filter_hook *pfh;
142 struct mbuf *m1;
143 int rv;
144#endif /* PFIL_HOOKS */
145#ifdef IPFIREWALL_FORWARD
146 int fwd_rewrite_src = 0;
147#endif
148 struct ip_fw_chain *rule = NULL;
149
150#ifdef IPDIVERT
151 /* Get and reset firewall cookie */
152 divert_cookie = ip_divert_cookie;
153 ip_divert_cookie = 0;
154#else
155 divert_cookie = 0;
156#endif
157
158#if defined(IPFIREWALL) && defined(DUMMYNET)
159 /*
160 * dummynet packet are prepended a vestigial mbuf with
161 * m_type = MT_DUMMYNET and m_data pointing to the matching
162 * rule.
163 */
164 if (m->m_type == MT_DUMMYNET) {
165 /*
166 * the packet was already tagged, so part of the
167 * processing was already done, and we need to go down.
168 * Get parameters from the header.
169 */
170 rule = (struct ip_fw_chain *)(m->m_data) ;
171 opt = NULL ;
172 ro = & ( ((struct dn_pkt *)m)->ro ) ;
173 imo = NULL ;
174 dst = ((struct dn_pkt *)m)->dn_dst ;
175 ifp = ((struct dn_pkt *)m)->ifp ;
176 flags = ((struct dn_pkt *)m)->flags ;
177
178 m0 = m = m->m_next ;
179#ifdef IPSEC
180 so = ipsec_getsocket(m);
181 (void)ipsec_setsocket(m, NULL);
182#endif
183 ip = mtod(m, struct ip *);
184 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
185 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
186 goto sendit;
187 } else
188 rule = NULL ;
189#endif
190#ifdef IPSEC
191 so = ipsec_getsocket(m);
192 (void)ipsec_setsocket(m, NULL);
193#endif
194
195#ifdef DIAGNOSTIC
196 if ((m->m_flags & M_PKTHDR) == 0)
197 panic("ip_output no HDR");
198 if (!ro)
199 panic("ip_output no route, proto = %d",
200 mtod(m, struct ip *)->ip_p);
201#endif
202 if (opt) {
203 m = ip_insertoptions(m, opt, &len);
204 hlen = len;
205 }
206 ip = mtod(m, struct ip *);
207 /*
208 * Fill in IP header.
209 */
210 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
211 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
212 ip->ip_off &= IP_DF;
213#ifdef RANDOM_IP_ID
214 ip->ip_id = ip_randomid();
215#else
216 ip->ip_id = htons(ip_id++);
217#endif
218 ipstat.ips_localout++;
219 } else {
220 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
221 }
222
223 dst = (struct sockaddr_in *)&ro->ro_dst;
224 /*
225 * If there is a cached route,
226 * check that it is to the same destination
227 * and is still up. If not, free it and try again.
228 */
229 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
230 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
231 RTFREE(ro->ro_rt);
232 ro->ro_rt = (struct rtentry *)0;
233 }
234 if (ro->ro_rt == 0) {
235 dst->sin_family = AF_INET;
236 dst->sin_len = sizeof(*dst);
237 dst->sin_addr = ip->ip_dst;
238 }
239 /*
240 * If routing to interface only,
241 * short circuit routing lookup.
242 */
243#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
244#define sintosa(sin) ((struct sockaddr *)(sin))
245 if (flags & IP_ROUTETOIF) {
246 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
247 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
248 ipstat.ips_noroute++;
249 error = ENETUNREACH;
250 goto bad;
251 }
252 ifp = ia->ia_ifp;
253 ip->ip_ttl = 1;
254 isbroadcast = in_broadcast(dst->sin_addr, ifp);
255 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
256 imo != NULL && imo->imo_multicast_ifp != NULL) {
257 /*
258 * Bypass the normal routing lookup for multicast
259 * packets if the interface is specified.
260 */
261 ifp = imo->imo_multicast_ifp;
262 IFP_TO_IA(ifp, ia);
263 isbroadcast = 0; /* fool gcc */
264 } else {
265 /*
266 * If this is the case, we probably don't want to allocate
267 * a protocol-cloned route since we didn't get one from the
268 * ULP. This lets TCP do its thing, while not burdening
269 * forwarding or ICMP with the overhead of cloning a route.
270 * Of course, we still want to do any cloning requested by
271 * the link layer, as this is probably required in all cases
272 * for correct operation (as it is for ARP).
273 */
274 if (ro->ro_rt == 0)
275 rtalloc_ign(ro, RTF_PRCLONING);
276 if (ro->ro_rt == 0) {
277 ipstat.ips_noroute++;
278 error = EHOSTUNREACH;
279 goto bad;
280 }
281 ia = ifatoia(ro->ro_rt->rt_ifa);
282 ifp = ro->ro_rt->rt_ifp;
283 ro->ro_rt->rt_use++;
284 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
285 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
286 if (ro->ro_rt->rt_flags & RTF_HOST)
287 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
288 else
289 isbroadcast = in_broadcast(dst->sin_addr, ifp);
290 }
291 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
292 struct in_multi *inm;
293
294 m->m_flags |= M_MCAST;
295 /*
296 * IP destination address is multicast. Make sure "dst"
297 * still points to the address in "ro". (It may have been
298 * changed to point to a gateway address, above.)
299 */
300 dst = (struct sockaddr_in *)&ro->ro_dst;
301 /*
302 * See if the caller provided any multicast options
303 */
304 if (imo != NULL) {
305 ip->ip_ttl = imo->imo_multicast_ttl;
306 if (imo->imo_multicast_vif != -1)
307 ip->ip_src.s_addr =
308 ip_mcast_src(imo->imo_multicast_vif);
309 } else
310 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
311 /*
312 * Confirm that the outgoing interface supports multicast.
313 */
314 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
315 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
316 ipstat.ips_noroute++;
317 error = ENETUNREACH;
318 goto bad;
319 }
320 }
321 /*
322 * If source address not specified yet, use address
323 * of outgoing interface.
324 */
325 if (ip->ip_src.s_addr == INADDR_ANY) {
326 /* Interface may have no addresses. */
327 if (ia != NULL)
328 ip->ip_src = IA_SIN(ia)->sin_addr;
329 }
330
331 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
332 if (inm != NULL &&
333 (imo == NULL || imo->imo_multicast_loop)) {
334 /*
335 * If we belong to the destination multicast group
336 * on the outgoing interface, and the caller did not
337 * forbid loopback, loop back a copy.
338 */
339 ip_mloopback(ifp, m, dst, hlen);
340 }
341 else {
342 /*
343 * If we are acting as a multicast router, perform
344 * multicast forwarding as if the packet had just
345 * arrived on the interface to which we are about
346 * to send. The multicast forwarding function
347 * recursively calls this function, using the
348 * IP_FORWARDING flag to prevent infinite recursion.
349 *
350 * Multicasts that are looped back by ip_mloopback(),
351 * above, will be forwarded by the ip_input() routine,
352 * if necessary.
353 */
354 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
355 /*
356 * Check if rsvp daemon is running. If not, don't
357 * set ip_moptions. This ensures that the packet
358 * is multicast and not just sent down one link
359 * as prescribed by rsvpd.
360 */
361 if (!rsvp_on)
362 imo = NULL;
363 if (ip_mforward(ip, ifp, m, imo) != 0) {
364 m_freem(m);
365 goto done;
366 }
367 }
368 }
369
370 /*
371 * Multicasts with a time-to-live of zero may be looped-
372 * back, above, but must not be transmitted on a network.
373 * Also, multicasts addressed to the loopback interface
374 * are not sent -- the above call to ip_mloopback() will
375 * loop back a copy if this host actually belongs to the
376 * destination group on the loopback interface.
377 */
378 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
379 m_freem(m);
380 goto done;
381 }
382
383 goto sendit;
384 }
385#ifndef notdef
386 /*
387 * If source address not specified yet, use address
388 * of outgoing interface.
389 */
390 if (ip->ip_src.s_addr == INADDR_ANY) {
391 /* Interface may have no addresses. */
392 if (ia != NULL) {
393 ip->ip_src = IA_SIN(ia)->sin_addr;
394#ifdef IPFIREWALL_FORWARD
395 /* Keep note that we did this - if the firewall changes
396 * the next-hop, our interface may change, changing the
397 * default source IP. It's a shame so much effort happens
398 * twice. Oh well.
399 */
400 fwd_rewrite_src++;
401#endif /* IPFIREWALL_FORWARD */
402 }
403 }
404#endif /* notdef */
405 /*
406 * Verify that we have any chance at all of being able to queue
407 * the packet or packet fragments
408 */
409 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
410 ifp->if_snd.ifq_maxlen) {
411 error = ENOBUFS;
412 goto bad;
413 }
414
415 /*
416 * Look for broadcast address and
417 * and verify user is allowed to send
418 * such a packet.
419 */
420 if (isbroadcast) {
421 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
422 error = EADDRNOTAVAIL;
423 goto bad;
424 }
425 if ((flags & IP_ALLOWBROADCAST) == 0) {
426 error = EACCES;
427 goto bad;
428 }
429 /* don't allow broadcast messages to be fragmented */
430 if ((u_short)ip->ip_len > ifp->if_mtu) {
431 error = EMSGSIZE;
432 goto bad;
433 }
434 m->m_flags |= M_BCAST;
435 } else {
436 m->m_flags &= ~M_BCAST;
437 }
438
439sendit:
440#ifdef IPSEC
441 /* get SP for this packet */
442 if (so == NULL)
443 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
444 else
445 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
446
447 if (sp == NULL) {
448 ipsecstat.out_inval++;
449 goto bad;
450 }
451
452 error = 0;
453
454 /* check policy */
455 switch (sp->policy) {
456 case IPSEC_POLICY_DISCARD:
457 /*
458 * This packet is just discarded.
459 */
460 ipsecstat.out_polvio++;
461 goto bad;
462
463 case IPSEC_POLICY_BYPASS:
464 case IPSEC_POLICY_NONE:
465 /* no need to do IPsec. */
466 goto skip_ipsec;
467
468 case IPSEC_POLICY_IPSEC:
469 if (sp->req == NULL) {
470 /* acquire a policy */
471 error = key_spdacquire(sp);
472 goto bad;
473 }
474 break;
475
476 case IPSEC_POLICY_ENTRUST:
477 default:
478 printf("ip_output: Invalid policy found. %d\n", sp->policy);
479 }
480 {
481 struct ipsec_output_state state;
482 bzero(&state, sizeof(state));
483 state.m = m;
484 if (flags & IP_ROUTETOIF) {
485 state.ro = &iproute;
486 bzero(&iproute, sizeof(iproute));
487 } else
488 state.ro = ro;
489 state.dst = (struct sockaddr *)dst;
490
491 ip->ip_sum = 0;
492
493 /*
494 * XXX
495 * delayed checksums are not currently compatible with IPsec
496 */
497 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
498 in_delayed_cksum(m);
499 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
500 }
501
502 HTONS(ip->ip_len);
503 HTONS(ip->ip_off);
504
505 error = ipsec4_output(&state, sp, flags);
506
507 m = state.m;
508 if (flags & IP_ROUTETOIF) {
509 /*
510 * if we have tunnel mode SA, we may need to ignore
511 * IP_ROUTETOIF.
512 */
513 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
514 flags &= ~IP_ROUTETOIF;
515 ro = state.ro;
516 }
517 } else
518 ro = state.ro;
519 dst = (struct sockaddr_in *)state.dst;
520 if (error) {
521 /* mbuf is already reclaimed in ipsec4_output. */
522 m0 = NULL;
523 switch (error) {
524 case EHOSTUNREACH:
525 case ENETUNREACH:
526 case EMSGSIZE:
527 case ENOBUFS:
528 case ENOMEM:
529 break;
530 default:
531 printf("ip4_output (ipsec): error code %d\n", error);
532 /*fall through*/
533 case ENOENT:
534 /* don't show these error codes to the user */
535 error = 0;
536 break;
537 }
538 goto bad;
539 }
540 }
541
542 /* be sure to update variables that are affected by ipsec4_output() */
543 ip = mtod(m, struct ip *);
544#ifdef _IP_VHL
545 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
546#else
547 hlen = ip->ip_hl << 2;
548#endif
549 if (ro->ro_rt == NULL) {
550 if ((flags & IP_ROUTETOIF) == 0) {
551 printf("ip_output: "
552 "can't update route after IPsec processing\n");
553 error = EHOSTUNREACH; /*XXX*/
554 goto bad;
555 }
556 } else {
557 ia = ifatoia(ro->ro_rt->rt_ifa);
558 ifp = ro->ro_rt->rt_ifp;
559 }
560
561 /* make it flipped, again. */
562 NTOHS(ip->ip_len);
563 NTOHS(ip->ip_off);
564skip_ipsec:
565#endif /*IPSEC*/
566
567 /*
568 * IpHack's section.
569 * - Xlate: translate packet's addr/port (NAT).
570 * - Firewall: deny/allow/etc.
571 * - Wrap: fake packet's addr/port <unimpl.>
572 * - Encapsulate: put it in another IP and send out. <unimp.>
573 */
574#ifdef PFIL_HOOKS
575 /*
576 * Run through list of hooks for output packets.
577 */
578 m1 = m;
579 pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
580 for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
581 if (pfh->pfil_func) {
582 rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1);
583 if (rv) {
584 error = EHOSTUNREACH;
585 goto done;
586 }
587 m = m1;
588 if (m == NULL)
589 goto done;
590 ip = mtod(m, struct ip *);
591 }
592#endif /* PFIL_HOOKS */
593
594 /*
595 * Check with the firewall...
596 */
597 if (fw_enable && ip_fw_chk_ptr) {
598 struct sockaddr_in *old = dst;
599
600 off = (*ip_fw_chk_ptr)(&ip,
601 hlen, ifp, &divert_cookie, &m, &rule, &dst);
602 /*
603 * On return we must do the following:
604 * m == NULL -> drop the pkt (old interface, deprecated)
605 * (off & 0x40000) -> drop the pkt (new interface)
606 * 1<=off<= 0xffff -> DIVERT
607 * (off & 0x10000) -> send to a DUMMYNET pipe
608 * (off & 0x20000) -> TEE the packet
609 * dst != old -> IPFIREWALL_FORWARD
610 * off==0, dst==old -> accept
611 * If some of the above modules is not compiled in, then
612 * we should't have to check the corresponding condition
613 * (because the ipfw control socket should not accept
614 * unsupported rules), but better play safe and drop
615 * packets in case of doubt.
616 */
617 if (off & IP_FW_PORT_DENY_FLAG) { /* XXX new interface-denied */
618 if (m)
619 m_freem(m);
620 error = EACCES ;
621 goto done;
622 }
623 if (!m) { /* firewall said to reject */
624 static int __debug=10;
625 if (__debug >0) {
626 printf("firewall returns NULL, please update!\n");
627 __debug-- ;
628 }
629 error = EACCES;
630 goto done;
631 }
632 if (off == 0 && dst == old) /* common case */
633 goto pass ;
634#ifdef DUMMYNET
635 if ((off & IP_FW_PORT_DYNT_FLAG) != 0) {
636 /*
637 * pass the pkt to dummynet. Need to include
638 * pipe number, m, ifp, ro, dst because these are
639 * not recomputed in the next pass.
640 * All other parameters have been already used and
641 * so they are not needed anymore.
642 * XXX note: if the ifp or ro entry are deleted
643 * while a pkt is in dummynet, we are in trouble!
644 */
645 error = dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,
646 ifp,ro,dst,rule, flags);
647 goto done;
648 }
649#endif
650#ifdef IPDIVERT
651 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
652 struct mbuf *clone = NULL;
653
654 /* Clone packet if we're doing a 'tee' */
655 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
656 clone = m_dup(m, M_DONTWAIT);
657
658 /*
659 * XXX
660 * delayed checksums are not currently compatible
661 * with divert sockets.
662 */
663 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
664 in_delayed_cksum(m);
665 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
666 }
667
668 /* Restore packet header fields to original values */
669 HTONS(ip->ip_len);
670 HTONS(ip->ip_off);
671
672 /* Deliver packet to divert input routine */
673 ip_divert_cookie = divert_cookie;
674 divert_packet(m, 0, off & 0xffff);
675
676 /* If 'tee', continue with original packet */
677 if (clone != NULL) {
678 m = clone;
679 ip = mtod(m, struct ip *);
680 goto pass;
681 }
682 goto done;
683 }
684#endif
685
686#ifdef IPFIREWALL_FORWARD
687 /* Here we check dst to make sure it's directly reachable on the
688 * interface we previously thought it was.
689 * If it isn't (which may be likely in some situations) we have
690 * to re-route it (ie, find a route for the next-hop and the
691 * associated interface) and set them here. This is nested
692 * forwarding which in most cases is undesirable, except where
693 * such control is nigh impossible. So we do it here.
694 * And I'm babbling.
695 */
696 if (off == 0 && old != dst) {
697 struct in_ifaddr *ia;
698
699 /* It's changed... */
700 /* There must be a better way to do this next line... */
701 static struct route sro_fwd, *ro_fwd = &sro_fwd;
702#ifdef IPFIREWALL_FORWARD_DEBUG
703 printf("IPFIREWALL_FORWARD: New dst ip: ");
704 print_ip(dst->sin_addr);
705 printf("\n");
706#endif
707 /*
708 * We need to figure out if we have been forwarded
709 * to a local socket. If so then we should somehow
710 * "loop back" to ip_input, and get directed to the
711 * PCB as if we had received this packet. This is
712 * because it may be dificult to identify the packets
713 * you want to forward until they are being output
714 * and have selected an interface. (e.g. locally
715 * initiated packets) If we used the loopback inteface,
716 * we would not be able to control what happens
717 * as the packet runs through ip_input() as
718 * it is done through a ISR.
719 */
720 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
721 /*
722 * If the addr to forward to is one
723 * of ours, we pretend to
724 * be the destination for this packet.
725 */
726 if (IA_SIN(ia)->sin_addr.s_addr ==
727 dst->sin_addr.s_addr)
728 break;
729 }
730 if (ia) {
731 /* tell ip_input "dont filter" */
732 ip_fw_fwd_addr = dst;
733 if (m->m_pkthdr.rcvif == NULL)
734 m->m_pkthdr.rcvif = ifunit("lo0");
735 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
736 m->m_pkthdr.csum_flags |=
737 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
738 m0->m_pkthdr.csum_data = 0xffff;
739 }
740 m->m_pkthdr.csum_flags |=
741 CSUM_IP_CHECKED | CSUM_IP_VALID;
742 HTONS(ip->ip_len);
743 HTONS(ip->ip_off);
744 ip_input(m);
745 goto done;
746 }
747 /* Some of the logic for this was
748 * nicked from above.
749 *
750 * This rewrites the cached route in a local PCB.
751 * Is this what we want to do?
752 */
753 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
754
755 ro_fwd->ro_rt = 0;
756 rtalloc_ign(ro_fwd, RTF_PRCLONING);
757
758 if (ro_fwd->ro_rt == 0) {
759 ipstat.ips_noroute++;
760 error = EHOSTUNREACH;
761 goto bad;
762 }
763
764 ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
765 ifp = ro_fwd->ro_rt->rt_ifp;
766 ro_fwd->ro_rt->rt_use++;
767 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
768 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
769 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
770 isbroadcast =
771 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
772 else
773 isbroadcast = in_broadcast(dst->sin_addr, ifp);
774 RTFREE(ro->ro_rt);
775 ro->ro_rt = ro_fwd->ro_rt;
776 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
777
778 /*
779 * If we added a default src ip earlier,
780 * which would have been gotten from the-then
781 * interface, do it again, from the new one.
782 */
783 if (fwd_rewrite_src)
784 ip->ip_src = IA_SIN(ia)->sin_addr;
785 goto pass ;
786 }
787#endif /* IPFIREWALL_FORWARD */
788 /*
789 * if we get here, none of the above matches, and
790 * we have to drop the pkt
791 */
792 m_freem(m);
793 error = EACCES; /* not sure this is the right error msg */
794 goto done;
795 }
796
797pass:
798 m->m_pkthdr.csum_flags |= CSUM_IP;
799 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
800 if (sw_csum & CSUM_DELAY_DATA) {
801 in_delayed_cksum(m);
802 sw_csum &= ~CSUM_DELAY_DATA;
803 }
804 m->m_pkthdr.csum_flags &= ifp->if_hwassist;
805
806 /*
807 * If small enough for interface, or the interface will take
808 * care of the fragmentation for us, can just send directly.
809 */
810 if ((u_short)ip->ip_len <= ifp->if_mtu ||
811 ifp->if_hwassist & CSUM_FRAGMENT) {
812 HTONS(ip->ip_len);
813 HTONS(ip->ip_off);
814 ip->ip_sum = 0;
815 if (sw_csum & CSUM_DELAY_IP) {
816 if (ip->ip_vhl == IP_VHL_BORING) {
817 ip->ip_sum = in_cksum_hdr(ip);
818 } else {
819 ip->ip_sum = in_cksum(m, hlen);
820 }
821 }
822
823 /* Record statistics for this interface address. */
824 if (!(flags & IP_FORWARDING) && ia) {
825 ia->ia_ifa.if_opackets++;
826 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
827 }
828
829#ifdef IPSEC
830 /* clean ipsec history once it goes out of the node */
831 ipsec_delaux(m);
832#endif
833
834 error = (*ifp->if_output)(ifp, m,
835 (struct sockaddr *)dst, ro->ro_rt);
836 goto done;
837 }
838 /*
839 * Too large for interface; fragment if possible.
840 * Must be able to put at least 8 bytes per fragment.
841 */
842 if (ip->ip_off & IP_DF) {
843 error = EMSGSIZE;
844 /*
845 * This case can happen if the user changed the MTU
846 * of an interface after enabling IP on it. Because
847 * most netifs don't keep track of routes pointing to
848 * them, there is no way for one to update all its
849 * routes when the MTU is changed.
850 */
851 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
852 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
853 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
854 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
855 }
856 ipstat.ips_cantfrag++;
857 goto bad;
858 }
859 len = (ifp->if_mtu - hlen) &~ 7;
860 if (len < 8) {
861 error = EMSGSIZE;
862 goto bad;
863 }
864
865 /*
866 * if the interface will not calculate checksums on
867 * fragmented packets, then do it here.
868 */
869 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
870 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
871 in_delayed_cksum(m);
872 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
873 }
874
875 {
876 int mhlen, firstlen = len;
877 struct mbuf **mnext = &m->m_nextpkt;
878 int nfrags = 1;
879
880 /*
881 * Loop through length of segment after first fragment,
882 * make new header and copy data of each part and link onto chain.
883 */
884 m0 = m;
885 mhlen = sizeof (struct ip);
886 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
887 MGETHDR(m, M_DONTWAIT, MT_HEADER);
888 if (m == 0) {
889 error = ENOBUFS;
890 ipstat.ips_odropped++;
891 goto sendorfree;
892 }
893 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
894 m->m_data += max_linkhdr;
895 mhip = mtod(m, struct ip *);
896 *mhip = *ip;
897 if (hlen > sizeof (struct ip)) {
898 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
899 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
900 }
901 m->m_len = mhlen;
902 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
903 if (off + len >= (u_short)ip->ip_len)
904 len = (u_short)ip->ip_len - off;
905 else
906 mhip->ip_off |= IP_MF;
907 mhip->ip_len = htons((u_short)(len + mhlen));
908 m->m_next = m_copy(m0, off, len);
909 if (m->m_next == 0) {
910 (void) m_free(m);
911 error = ENOBUFS; /* ??? */
912 ipstat.ips_odropped++;
913 goto sendorfree;
914 }
915 m->m_pkthdr.len = mhlen + len;
916 m->m_pkthdr.rcvif = (struct ifnet *)0;
917 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
918 HTONS(mhip->ip_off);
919 mhip->ip_sum = 0;
920 if (sw_csum & CSUM_DELAY_IP) {
921 if (mhip->ip_vhl == IP_VHL_BORING) {
922 mhip->ip_sum = in_cksum_hdr(mhip);
923 } else {
924 mhip->ip_sum = in_cksum(m, mhlen);
925 }
926 }
927 *mnext = m;
928 mnext = &m->m_nextpkt;
929 nfrags++;
930 }
931 ipstat.ips_ofragments += nfrags;
932
933 /* set first/last markers for fragment chain */
934 m->m_flags |= M_LASTFRAG;
935 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
936 m0->m_pkthdr.csum_data = nfrags;
937
938 /*
939 * Update first fragment by trimming what's been copied out
940 * and updating header, then send each fragment (in order).
941 */
942 m = m0;
943 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
944 m->m_pkthdr.len = hlen + firstlen;
945 ip->ip_len = htons((u_short)m->m_pkthdr.len);
946 ip->ip_off |= IP_MF;
947 HTONS(ip->ip_off);
948 ip->ip_sum = 0;
949 if (sw_csum & CSUM_DELAY_IP) {
950 if (ip->ip_vhl == IP_VHL_BORING) {
951 ip->ip_sum = in_cksum_hdr(ip);
952 } else {
953 ip->ip_sum = in_cksum(m, hlen);
954 }
955 }
956sendorfree:
957 for (m = m0; m; m = m0) {
958 m0 = m->m_nextpkt;
959 m->m_nextpkt = 0;
960#ifdef IPSEC
961 /* clean ipsec history once it goes out of the node */
962 ipsec_delaux(m);
963#endif
964 if (error == 0) {
965 /* Record statistics for this interface address. */
966 if (ia != NULL) {
967 ia->ia_ifa.if_opackets++;
968 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
969 }
970
971 error = (*ifp->if_output)(ifp, m,
972 (struct sockaddr *)dst, ro->ro_rt);
973 } else
974 m_freem(m);
975 }
976
977 if (error == 0)
978 ipstat.ips_fragmented++;
979 }
980done:
981#ifdef IPSEC
982 if (ro == &iproute && ro->ro_rt) {
983 RTFREE(ro->ro_rt);
984 ro->ro_rt = NULL;
985 }
986 if (sp != NULL) {
987 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
988 printf("DP ip_output call free SP:%p\n", sp));
989 key_freesp(sp);
990 }
991#endif /* IPSEC */
992 return (error);
993bad:
994 m_freem(m0);
995 goto done;
996}
997
998void
999in_delayed_cksum(struct mbuf *m)
1000{
1001 struct ip *ip;
1002 u_short csum, offset;
1003
1004 ip = mtod(m, struct ip *);
1005 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1006 csum = in_cksum_skip(m, ip->ip_len, offset);
1007 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1008 csum = 0xffff;
1009 offset += m->m_pkthdr.csum_data; /* checksum offset */
1010
1011 if (offset + sizeof(u_short) > m->m_len) {
1012 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1013 m->m_len, offset, ip->ip_p);
1014 /*
1015 * XXX
1016 * this shouldn't happen, but if it does, the
1017 * correct behavior may be to insert the checksum
1018 * in the existing chain instead of rearranging it.
1019 */
1020 m = m_pullup(m, offset + sizeof(u_short));
1021 }
1022 *(u_short *)(m->m_data + offset) = csum;
1023}
1024
1025/*
1026 * Insert IP options into preformed packet.
1027 * Adjust IP destination as required for IP source routing,
1028 * as indicated by a non-zero in_addr at the start of the options.
1029 *
1030 * XXX This routine assumes that the packet has no options in place.
1031 */
1032static struct mbuf *
1033ip_insertoptions(m, opt, phlen)
1034 register struct mbuf *m;
1035 struct mbuf *opt;
1036 int *phlen;
1037{
1038 register struct ipoption *p = mtod(opt, struct ipoption *);
1039 struct mbuf *n;
1040 register struct ip *ip = mtod(m, struct ip *);
1041 unsigned optlen;
1042
1043 optlen = opt->m_len - sizeof(p->ipopt_dst);
1044 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1045 return (m); /* XXX should fail */
1046 if (p->ipopt_dst.s_addr)
1047 ip->ip_dst = p->ipopt_dst;
1048 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1049 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1050 if (n == 0)
1051 return (m);
1052 n->m_pkthdr.rcvif = (struct ifnet *)0;
1053 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1054 m->m_len -= sizeof(struct ip);
1055 m->m_data += sizeof(struct ip);
1056 n->m_next = m;
1057 m = n;
1058 m->m_len = optlen + sizeof(struct ip);
1059 m->m_data += max_linkhdr;
1060 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1061 } else {
1062 m->m_data -= optlen;
1063 m->m_len += optlen;
1064 m->m_pkthdr.len += optlen;
1065 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1066 }
1067 ip = mtod(m, struct ip *);
1068 bcopy(p->ipopt_list, ip + 1, optlen);
1069 *phlen = sizeof(struct ip) + optlen;
1070 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1071 ip->ip_len += optlen;
1072 return (m);
1073}
1074
1075/*
1076 * Copy options from ip to jp,
1077 * omitting those not copied during fragmentation.
1078 */
1079int
1080ip_optcopy(ip, jp)
1081 struct ip *ip, *jp;
1082{
1083 register u_char *cp, *dp;
1084 int opt, optlen, cnt;
1085
1086 cp = (u_char *)(ip + 1);
1087 dp = (u_char *)(jp + 1);
1088 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1089 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1090 opt = cp[0];
1091 if (opt == IPOPT_EOL)
1092 break;
1093 if (opt == IPOPT_NOP) {
1094 /* Preserve for IP mcast tunnel's LSRR alignment. */
1095 *dp++ = IPOPT_NOP;
1096 optlen = 1;
1097 continue;
1098 }
1099#ifdef DIAGNOSTIC
1100 if (cnt < IPOPT_OLEN + sizeof(*cp))
1101 panic("malformed IPv4 option passed to ip_optcopy");
1102#endif
1103 optlen = cp[IPOPT_OLEN];
1104#ifdef DIAGNOSTIC
1105 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1106 panic("malformed IPv4 option passed to ip_optcopy");
1107#endif
1108 /* bogus lengths should have been caught by ip_dooptions */
1109 if (optlen > cnt)
1110 optlen = cnt;
1111 if (IPOPT_COPIED(opt)) {
1112 bcopy(cp, dp, optlen);
1113 dp += optlen;
1114 }
1115 }
1116 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1117 *dp++ = IPOPT_EOL;
1118 return (optlen);
1119}
1120
1121/*
1122 * IP socket option processing.
1123 */
1124int
1125ip_ctloutput(so, sopt)
1126 struct socket *so;
1127 struct sockopt *sopt;
1128{
1129 struct inpcb *inp = sotoinpcb(so);
1130 int error, optval;
1131
1132 error = optval = 0;
1133 if (sopt->sopt_level != IPPROTO_IP) {
1134 return (EINVAL);
1135 }
1136
1137 switch (sopt->sopt_dir) {
1138 case SOPT_SET:
1139 switch (sopt->sopt_name) {
1140 case IP_OPTIONS:
1141#ifdef notyet
1142 case IP_RETOPTS:
1143#endif
1144 {
1145 struct mbuf *m;
1146 if (sopt->sopt_valsize > MLEN) {
1147 error = EMSGSIZE;
1148 break;
1149 }
1150 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
1151 if (m == 0) {
1152 error = ENOBUFS;
1153 break;
1154 }
1155 m->m_len = sopt->sopt_valsize;
1156 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1157 m->m_len);
1158
1159 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1160 m));
1161 }
1162
1163 case IP_TOS:
1164 case IP_TTL:
1165 case IP_RECVOPTS:
1166 case IP_RECVRETOPTS:
1167 case IP_RECVDSTADDR:
1168 case IP_RECVIF:
1169#if defined(NFAITH) && NFAITH > 0
1170 case IP_FAITH:
1171#endif
1172 error = sooptcopyin(sopt, &optval, sizeof optval,
1173 sizeof optval);
1174 if (error)
1175 break;
1176
1177 switch (sopt->sopt_name) {
1178 case IP_TOS:
1179 inp->inp_ip_tos = optval;
1180 break;
1181
1182 case IP_TTL:
1183 inp->inp_ip_ttl = optval;
1184 break;
1185#define OPTSET(bit) \
1186 if (optval) \
1187 inp->inp_flags |= bit; \
1188 else \
1189 inp->inp_flags &= ~bit;
1190
1191 case IP_RECVOPTS:
1192 OPTSET(INP_RECVOPTS);
1193 break;
1194
1195 case IP_RECVRETOPTS:
1196 OPTSET(INP_RECVRETOPTS);
1197 break;
1198
1199 case IP_RECVDSTADDR:
1200 OPTSET(INP_RECVDSTADDR);
1201 break;
1202
1203 case IP_RECVIF:
1204 OPTSET(INP_RECVIF);
1205 break;
1206
1207#if defined(NFAITH) && NFAITH > 0
1208 case IP_FAITH:
1209 OPTSET(INP_FAITH);
1210 break;
1211#endif
1212 }
1213 break;
1214#undef OPTSET
1215
1216 case IP_MULTICAST_IF:
1217 case IP_MULTICAST_VIF:
1218 case IP_MULTICAST_TTL:
1219 case IP_MULTICAST_LOOP:
1220 case IP_ADD_MEMBERSHIP:
1221 case IP_DROP_MEMBERSHIP:
1222 error = ip_setmoptions(sopt, &inp->inp_moptions);
1223 break;
1224
1225 case IP_PORTRANGE:
1226 error = sooptcopyin(sopt, &optval, sizeof optval,
1227 sizeof optval);
1228 if (error)
1229 break;
1230
1231 switch (optval) {
1232 case IP_PORTRANGE_DEFAULT:
1233 inp->inp_flags &= ~(INP_LOWPORT);
1234 inp->inp_flags &= ~(INP_HIGHPORT);
1235 break;
1236
1237 case IP_PORTRANGE_HIGH:
1238 inp->inp_flags &= ~(INP_LOWPORT);
1239 inp->inp_flags |= INP_HIGHPORT;
1240 break;
1241
1242 case IP_PORTRANGE_LOW:
1243 inp->inp_flags &= ~(INP_HIGHPORT);
1244 inp->inp_flags |= INP_LOWPORT;
1245 break;
1246
1247 default:
1248 error = EINVAL;
1249 break;
1250 }
1251 break;
1252
1253#ifdef IPSEC
1254 case IP_IPSEC_POLICY:
1255 {
1256 caddr_t req;
1257 size_t len = 0;
1258 int priv;
1259 struct mbuf *m;
1260 int optname;
1261
1262 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1263 break;
1264 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1265 break;
1266 priv = (sopt->sopt_td != NULL &&
1267 suser_td(sopt->sopt_td) != 0) ? 0 : 1;
1268 req = mtod(m, caddr_t);
1269 len = m->m_len;
1270 optname = sopt->sopt_name;
1271 error = ipsec4_set_policy(inp, optname, req, len, priv);
1272 m_freem(m);
1273 break;
1274 }
1275#endif /*IPSEC*/
1276
1277 default:
1278 error = ENOPROTOOPT;
1279 break;
1280 }
1281 break;
1282
1283 case SOPT_GET:
1284 switch (sopt->sopt_name) {
1285 case IP_OPTIONS:
1286 case IP_RETOPTS:
1287 if (inp->inp_options)
1288 error = sooptcopyout(sopt,
1289 mtod(inp->inp_options,
1290 char *),
1291 inp->inp_options->m_len);
1292 else
1293 sopt->sopt_valsize = 0;
1294 break;
1295
1296 case IP_TOS:
1297 case IP_TTL:
1298 case IP_RECVOPTS:
1299 case IP_RECVRETOPTS:
1300 case IP_RECVDSTADDR:
1301 case IP_RECVIF:
1302 case IP_PORTRANGE:
1303#if defined(NFAITH) && NFAITH > 0
1304 case IP_FAITH:
1305#endif
1306 switch (sopt->sopt_name) {
1307
1308 case IP_TOS:
1309 optval = inp->inp_ip_tos;
1310 break;
1311
1312 case IP_TTL:
1313 optval = inp->inp_ip_ttl;
1314 break;
1315
1316#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1317
1318 case IP_RECVOPTS:
1319 optval = OPTBIT(INP_RECVOPTS);
1320 break;
1321
1322 case IP_RECVRETOPTS:
1323 optval = OPTBIT(INP_RECVRETOPTS);
1324 break;
1325
1326 case IP_RECVDSTADDR:
1327 optval = OPTBIT(INP_RECVDSTADDR);
1328 break;
1329
1330 case IP_RECVIF:
1331 optval = OPTBIT(INP_RECVIF);
1332 break;
1333
1334 case IP_PORTRANGE:
1335 if (inp->inp_flags & INP_HIGHPORT)
1336 optval = IP_PORTRANGE_HIGH;
1337 else if (inp->inp_flags & INP_LOWPORT)
1338 optval = IP_PORTRANGE_LOW;
1339 else
1340 optval = 0;
1341 break;
1342
1343#if defined(NFAITH) && NFAITH > 0
1344 case IP_FAITH:
1345 optval = OPTBIT(INP_FAITH);
1346 break;
1347#endif
1348 }
1349 error = sooptcopyout(sopt, &optval, sizeof optval);
1350 break;
1351
1352 case IP_MULTICAST_IF:
1353 case IP_MULTICAST_VIF:
1354 case IP_MULTICAST_TTL:
1355 case IP_MULTICAST_LOOP:
1356 case IP_ADD_MEMBERSHIP:
1357 case IP_DROP_MEMBERSHIP:
1358 error = ip_getmoptions(sopt, inp->inp_moptions);
1359 break;
1360
1361#ifdef IPSEC
1362 case IP_IPSEC_POLICY:
1363 {
1364 struct mbuf *m = NULL;
1365 caddr_t req = NULL;
1366 size_t len = 0;
1367
1368 if (m != 0) {
1369 req = mtod(m, caddr_t);
1370 len = m->m_len;
1371 }
1372 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1373 if (error == 0)
1374 error = soopt_mcopyout(sopt, m); /* XXX */
1375 if (error == 0)
1376 m_freem(m);
1377 break;
1378 }
1379#endif /*IPSEC*/
1380
1381 default:
1382 error = ENOPROTOOPT;
1383 break;
1384 }
1385 break;
1386 }
1387 return (error);
1388}
1389
1390/*
1391 * Set up IP options in pcb for insertion in output packets.
1392 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1393 * with destination address if source routed.
1394 */
1395static int
1396ip_pcbopts(optname, pcbopt, m)
1397 int optname;
1398 struct mbuf **pcbopt;
1399 register struct mbuf *m;
1400{
1401 register int cnt, optlen;
1402 register u_char *cp;
1403 u_char opt;
1404
1405 /* turn off any old options */
1406 if (*pcbopt)
1407 (void)m_free(*pcbopt);
1408 *pcbopt = 0;
1409 if (m == (struct mbuf *)0 || m->m_len == 0) {
1410 /*
1411 * Only turning off any previous options.
1412 */
1413 if (m)
1414 (void)m_free(m);
1415 return (0);
1416 }
1417
1418 if (m->m_len % sizeof(int32_t))
1419 goto bad;
1420 /*
1421 * IP first-hop destination address will be stored before
1422 * actual options; move other options back
1423 * and clear it when none present.
1424 */
1425 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1426 goto bad;
1427 cnt = m->m_len;
1428 m->m_len += sizeof(struct in_addr);
1429 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1430 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1431 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1432
1433 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1434 opt = cp[IPOPT_OPTVAL];
1435 if (opt == IPOPT_EOL)
1436 break;
1437 if (opt == IPOPT_NOP)
1438 optlen = 1;
1439 else {
1440 if (cnt < IPOPT_OLEN + sizeof(*cp))
1441 goto bad;
1442 optlen = cp[IPOPT_OLEN];
1443 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1444 goto bad;
1445 }
1446 switch (opt) {
1447
1448 default:
1449 break;
1450
1451 case IPOPT_LSRR:
1452 case IPOPT_SSRR:
1453 /*
1454 * user process specifies route as:
1455 * ->A->B->C->D
1456 * D must be our final destination (but we can't
1457 * check that since we may not have connected yet).
1458 * A is first hop destination, which doesn't appear in
1459 * actual IP option, but is stored before the options.
1460 */
1461 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1462 goto bad;
1463 m->m_len -= sizeof(struct in_addr);
1464 cnt -= sizeof(struct in_addr);
1465 optlen -= sizeof(struct in_addr);
1466 cp[IPOPT_OLEN] = optlen;
1467 /*
1468 * Move first hop before start of options.
1469 */
1470 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1471 sizeof(struct in_addr));
1472 /*
1473 * Then copy rest of options back
1474 * to close up the deleted entry.
1475 */
1476 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1477 sizeof(struct in_addr)),
1478 (caddr_t)&cp[IPOPT_OFFSET+1],
1479 (unsigned)cnt + sizeof(struct in_addr));
1480 break;
1481 }
1482 }
1483 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1484 goto bad;
1485 *pcbopt = m;
1486 return (0);
1487
1488bad:
1489 (void)m_free(m);
1490 return (EINVAL);
1491}
1492
1493/*
1494 * XXX
1495 * The whole multicast option thing needs to be re-thought.
1496 * Several of these options are equally applicable to non-multicast
1497 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1498 * standard option (IP_TTL).
1499 */
1500
1501/*
1502 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1503 */
1504static struct ifnet *
1505ip_multicast_if(a, ifindexp)
1506 struct in_addr *a;
1507 int *ifindexp;
1508{
1509 int ifindex;
1510 struct ifnet *ifp;
1511
1512 if (ifindexp)
1513 *ifindexp = 0;
1514 if (ntohl(a->s_addr) >> 24 == 0) {
1515 ifindex = ntohl(a->s_addr) & 0xffffff;
1516 if (ifindex < 0 || if_index < ifindex)
1517 return NULL;
1518 ifp = ifnet_byindex(ifindex);
1519 if (ifindexp)
1520 *ifindexp = ifindex;
1521 } else {
1522 INADDR_TO_IFP(*a, ifp);
1523 }
1524 return ifp;
1525}
1526
1527/*
1528 * Set the IP multicast options in response to user setsockopt().
1529 */
1530static int
1531ip_setmoptions(sopt, imop)
1532 struct sockopt *sopt;
1533 struct ip_moptions **imop;
1534{
1535 int error = 0;
1536 int i;
1537 struct in_addr addr;
1538 struct ip_mreq mreq;
1539 struct ifnet *ifp;
1540 struct ip_moptions *imo = *imop;
1541 struct route ro;
1542 struct sockaddr_in *dst;
1543 int ifindex;
1544 int s;
1545
1546 if (imo == NULL) {
1547 /*
1548 * No multicast option buffer attached to the pcb;
1549 * allocate one and initialize to default values.
1550 */
1551 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1552 M_WAITOK);
1553
1554 if (imo == NULL)
1555 return (ENOBUFS);
1556 *imop = imo;
1557 imo->imo_multicast_ifp = NULL;
1558 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1559 imo->imo_multicast_vif = -1;
1560 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1561 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1562 imo->imo_num_memberships = 0;
1563 }
1564
1565 switch (sopt->sopt_name) {
1566 /* store an index number for the vif you wanna use in the send */
1567 case IP_MULTICAST_VIF:
1568 if (legal_vif_num == 0) {
1569 error = EOPNOTSUPP;
1570 break;
1571 }
1572 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1573 if (error)
1574 break;
1575 if (!legal_vif_num(i) && (i != -1)) {
1576 error = EINVAL;
1577 break;
1578 }
1579 imo->imo_multicast_vif = i;
1580 break;
1581
1582 case IP_MULTICAST_IF:
1583 /*
1584 * Select the interface for outgoing multicast packets.
1585 */
1586 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1587 if (error)
1588 break;
1589 /*
1590 * INADDR_ANY is used to remove a previous selection.
1591 * When no interface is selected, a default one is
1592 * chosen every time a multicast packet is sent.
1593 */
1594 if (addr.s_addr == INADDR_ANY) {
1595 imo->imo_multicast_ifp = NULL;
1596 break;
1597 }
1598 /*
1599 * The selected interface is identified by its local
1600 * IP address. Find the interface and confirm that
1601 * it supports multicasting.
1602 */
1603 s = splimp();
1604 ifp = ip_multicast_if(&addr, &ifindex);
1605 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1606 splx(s);
1607 error = EADDRNOTAVAIL;
1608 break;
1609 }
1610 imo->imo_multicast_ifp = ifp;
1611 if (ifindex)
1612 imo->imo_multicast_addr = addr;
1613 else
1614 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1615 splx(s);
1616 break;
1617
1618 case IP_MULTICAST_TTL:
1619 /*
1620 * Set the IP time-to-live for outgoing multicast packets.
1621 * The original multicast API required a char argument,
1622 * which is inconsistent with the rest of the socket API.
1623 * We allow either a char or an int.
1624 */
1625 if (sopt->sopt_valsize == 1) {
1626 u_char ttl;
1627 error = sooptcopyin(sopt, &ttl, 1, 1);
1628 if (error)
1629 break;
1630 imo->imo_multicast_ttl = ttl;
1631 } else {
1632 u_int ttl;
1633 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1634 sizeof ttl);
1635 if (error)
1636 break;
1637 if (ttl > 255)
1638 error = EINVAL;
1639 else
1640 imo->imo_multicast_ttl = ttl;
1641 }
1642 break;
1643
1644 case IP_MULTICAST_LOOP:
1645 /*
1646 * Set the loopback flag for outgoing multicast packets.
1647 * Must be zero or one. The original multicast API required a
1648 * char argument, which is inconsistent with the rest
1649 * of the socket API. We allow either a char or an int.
1650 */
1651 if (sopt->sopt_valsize == 1) {
1652 u_char loop;
1653 error = sooptcopyin(sopt, &loop, 1, 1);
1654 if (error)
1655 break;
1656 imo->imo_multicast_loop = !!loop;
1657 } else {
1658 u_int loop;
1659 error = sooptcopyin(sopt, &loop, sizeof loop,
1660 sizeof loop);
1661 if (error)
1662 break;
1663 imo->imo_multicast_loop = !!loop;
1664 }
1665 break;
1666
1667 case IP_ADD_MEMBERSHIP:
1668 /*
1669 * Add a multicast group membership.
1670 * Group must be a valid IP multicast address.
1671 */
1672 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1673 if (error)
1674 break;
1675
1676 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1677 error = EINVAL;
1678 break;
1679 }
1680 s = splimp();
1681 /*
1682 * If no interface address was provided, use the interface of
1683 * the route to the given multicast address.
1684 */
1685 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1686 bzero((caddr_t)&ro, sizeof(ro));
1687 dst = (struct sockaddr_in *)&ro.ro_dst;
1688 dst->sin_len = sizeof(*dst);
1689 dst->sin_family = AF_INET;
1690 dst->sin_addr = mreq.imr_multiaddr;
1691 rtalloc(&ro);
1692 if (ro.ro_rt == NULL) {
1693 error = EADDRNOTAVAIL;
1694 splx(s);
1695 break;
1696 }
1697 ifp = ro.ro_rt->rt_ifp;
1698 rtfree(ro.ro_rt);
1699 }
1700 else {
1701 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1702 }
1703
1704 /*
1705 * See if we found an interface, and confirm that it
1706 * supports multicast.
1707 */
1708 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1709 error = EADDRNOTAVAIL;
1710 splx(s);
1711 break;
1712 }
1713 /*
1714 * See if the membership already exists or if all the
1715 * membership slots are full.
1716 */
1717 for (i = 0; i < imo->imo_num_memberships; ++i) {
1718 if (imo->imo_membership[i]->inm_ifp == ifp &&
1719 imo->imo_membership[i]->inm_addr.s_addr
1720 == mreq.imr_multiaddr.s_addr)
1721 break;
1722 }
1723 if (i < imo->imo_num_memberships) {
1724 error = EADDRINUSE;
1725 splx(s);
1726 break;
1727 }
1728 if (i == IP_MAX_MEMBERSHIPS) {
1729 error = ETOOMANYREFS;
1730 splx(s);
1731 break;
1732 }
1733 /*
1734 * Everything looks good; add a new record to the multicast
1735 * address list for the given interface.
1736 */
1737 if ((imo->imo_membership[i] =
1738 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1739 error = ENOBUFS;
1740 splx(s);
1741 break;
1742 }
1743 ++imo->imo_num_memberships;
1744 splx(s);
1745 break;
1746
1747 case IP_DROP_MEMBERSHIP:
1748 /*
1749 * Drop a multicast group membership.
1750 * Group must be a valid IP multicast address.
1751 */
1752 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1753 if (error)
1754 break;
1755
1756 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1757 error = EINVAL;
1758 break;
1759 }
1760
1761 s = splimp();
1762 /*
1763 * If an interface address was specified, get a pointer
1764 * to its ifnet structure.
1765 */
1766 if (mreq.imr_interface.s_addr == INADDR_ANY)
1767 ifp = NULL;
1768 else {
1769 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1770 if (ifp == NULL) {
1771 error = EADDRNOTAVAIL;
1772 splx(s);
1773 break;
1774 }
1775 }
1776 /*
1777 * Find the membership in the membership array.
1778 */
1779 for (i = 0; i < imo->imo_num_memberships; ++i) {
1780 if ((ifp == NULL ||
1781 imo->imo_membership[i]->inm_ifp == ifp) &&
1782 imo->imo_membership[i]->inm_addr.s_addr ==
1783 mreq.imr_multiaddr.s_addr)
1784 break;
1785 }
1786 if (i == imo->imo_num_memberships) {
1787 error = EADDRNOTAVAIL;
1788 splx(s);
1789 break;
1790 }
1791 /*
1792 * Give up the multicast address record to which the
1793 * membership points.
1794 */
1795 in_delmulti(imo->imo_membership[i]);
1796 /*
1797 * Remove the gap in the membership array.
1798 */
1799 for (++i; i < imo->imo_num_memberships; ++i)
1800 imo->imo_membership[i-1] = imo->imo_membership[i];
1801 --imo->imo_num_memberships;
1802 splx(s);
1803 break;
1804
1805 default:
1806 error = EOPNOTSUPP;
1807 break;
1808 }
1809
1810 /*
1811 * If all options have default values, no need to keep the mbuf.
1812 */
1813 if (imo->imo_multicast_ifp == NULL &&
1814 imo->imo_multicast_vif == -1 &&
1815 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1816 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1817 imo->imo_num_memberships == 0) {
1818 free(*imop, M_IPMOPTS);
1819 *imop = NULL;
1820 }
1821
1822 return (error);
1823}
1824
1825/*
1826 * Return the IP multicast options in response to user getsockopt().
1827 */
1828static int
1829ip_getmoptions(sopt, imo)
1830 struct sockopt *sopt;
1831 register struct ip_moptions *imo;
1832{
1833 struct in_addr addr;
1834 struct in_ifaddr *ia;
1835 int error, optval;
1836 u_char coptval;
1837
1838 error = 0;
1839 switch (sopt->sopt_name) {
1840 case IP_MULTICAST_VIF:
1841 if (imo != NULL)
1842 optval = imo->imo_multicast_vif;
1843 else
1844 optval = -1;
1845 error = sooptcopyout(sopt, &optval, sizeof optval);
1846 break;
1847
1848 case IP_MULTICAST_IF:
1849 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1850 addr.s_addr = INADDR_ANY;
1851 else if (imo->imo_multicast_addr.s_addr) {
1852 /* return the value user has set */
1853 addr = imo->imo_multicast_addr;
1854 } else {
1855 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1856 addr.s_addr = (ia == NULL) ? INADDR_ANY
1857 : IA_SIN(ia)->sin_addr.s_addr;
1858 }
1859 error = sooptcopyout(sopt, &addr, sizeof addr);
1860 break;
1861
1862 case IP_MULTICAST_TTL:
1863 if (imo == 0)
1864 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1865 else
1866 optval = coptval = imo->imo_multicast_ttl;
1867 if (sopt->sopt_valsize == 1)
1868 error = sooptcopyout(sopt, &coptval, 1);
1869 else
1870 error = sooptcopyout(sopt, &optval, sizeof optval);
1871 break;
1872
1873 case IP_MULTICAST_LOOP:
1874 if (imo == 0)
1875 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1876 else
1877 optval = coptval = imo->imo_multicast_loop;
1878 if (sopt->sopt_valsize == 1)
1879 error = sooptcopyout(sopt, &coptval, 1);
1880 else
1881 error = sooptcopyout(sopt, &optval, sizeof optval);
1882 break;
1883
1884 default:
1885 error = ENOPROTOOPT;
1886 break;
1887 }
1888 return (error);
1889}
1890
1891/*
1892 * Discard the IP multicast options.
1893 */
1894void
1895ip_freemoptions(imo)
1896 register struct ip_moptions *imo;
1897{
1898 register int i;
1899
1900 if (imo != NULL) {
1901 for (i = 0; i < imo->imo_num_memberships; ++i)
1902 in_delmulti(imo->imo_membership[i]);
1903 free(imo, M_IPMOPTS);
1904 }
1905}
1906
1907/*
1908 * Routine called from ip_output() to loop back a copy of an IP multicast
1909 * packet to the input queue of a specified interface. Note that this
1910 * calls the output routine of the loopback "driver", but with an interface
1911 * pointer that might NOT be a loopback interface -- evil, but easier than
1912 * replicating that code here.
1913 */
1914static void
1915ip_mloopback(ifp, m, dst, hlen)
1916 struct ifnet *ifp;
1917 register struct mbuf *m;
1918 register struct sockaddr_in *dst;
1919 int hlen;
1920{
1921 register struct ip *ip;
1922 struct mbuf *copym;
1923
1924 copym = m_copy(m, 0, M_COPYALL);
1925 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1926 copym = m_pullup(copym, hlen);
1927 if (copym != NULL) {
1928 /*
1929 * We don't bother to fragment if the IP length is greater
1930 * than the interface's MTU. Can this possibly matter?
1931 */
1932 ip = mtod(copym, struct ip *);
1933 HTONS(ip->ip_len);
1934 HTONS(ip->ip_off);
1935 ip->ip_sum = 0;
1936 if (ip->ip_vhl == IP_VHL_BORING) {
1937 ip->ip_sum = in_cksum_hdr(ip);
1938 } else {
1939 ip->ip_sum = in_cksum(copym, hlen);
1940 }
1941 /*
1942 * NB:
1943 * It's not clear whether there are any lingering
1944 * reentrancy problems in other areas which might
1945 * be exposed by using ip_input directly (in
1946 * particular, everything which modifies the packet
1947 * in-place). Yet another option is using the
1948 * protosw directly to deliver the looped back
1949 * packet. For the moment, we'll err on the side
1950 * of safety by using if_simloop().
1951 */
1952#if 1 /* XXX */
1953 if (dst->sin_family != AF_INET) {
1954 printf("ip_mloopback: bad address family %d\n",
1955 dst->sin_family);
1956 dst->sin_family = AF_INET;
1957 }
1958#endif
1959
1960#ifdef notdef
1961 copym->m_pkthdr.rcvif = ifp;
1962 ip_input(copym);
1963#else
1964 /* if the checksum hasn't been computed, mark it as valid */
1965 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1966 copym->m_pkthdr.csum_flags |=
1967 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1968 copym->m_pkthdr.csum_data = 0xffff;
1969 }
1970 if_simloop(ifp, copym, dst->sin_family, 0);
1971#endif
1972 }
1973}