Deleted Added
sdiff udiff text old ( 83366 ) new ( 83934 )
full compact
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
34 * $FreeBSD: head/sys/netinet/ip_output.c 83934 2001-09-25 18:40:52Z brooks $
35 */
36
37#define _IP_VHL
38
39#include "opt_ipfw.h"
40#include "opt_ipdn.h"
41#include "opt_ipdivert.h"
42#include "opt_ipfilter.h"
43#include "opt_ipsec.h"
44#include "opt_pfil_hooks.h"
45#include "opt_random_ip_id.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/kernel.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/protosw.h>
53#include <sys/socket.h>
54#include <sys/socketvar.h>
55
56#include <net/if.h>
57#include <net/route.h>
58
59#include <netinet/in.h>
60#include <netinet/in_systm.h>
61#include <netinet/ip.h>
62#include <netinet/in_pcb.h>
63#include <netinet/in_var.h>
64#include <netinet/ip_var.h>
65
66#include <machine/in_cksum.h>
67
68static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
69
70#ifdef IPSEC
71#include <netinet6/ipsec.h>
72#include <netkey/key.h>
73#ifdef IPSEC_DEBUG
74#include <netkey/key_debug.h>
75#else
76#define KEYDEBUG(lev,arg)
77#endif
78#endif /*IPSEC*/
79
80#include <netinet/ip_fw.h>
81
82#ifdef DUMMYNET
83#include <netinet/ip_dummynet.h>
84#endif
85
86#ifdef IPFIREWALL_FORWARD_DEBUG
87#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
88 (ntohl(a.s_addr)>>16)&0xFF,\
89 (ntohl(a.s_addr)>>8)&0xFF,\
90 (ntohl(a.s_addr))&0xFF);
91#endif
92
93u_short ip_id;
94
95static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
96static struct ifnet *ip_multicast_if __P((struct in_addr *, int *));
97static void ip_mloopback
98 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
99static int ip_getmoptions
100 __P((struct sockopt *, struct ip_moptions *));
101static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
102static int ip_setmoptions
103 __P((struct sockopt *, struct ip_moptions **));
104
105int ip_optcopy __P((struct ip *, struct ip *));
106
107
108extern struct protosw inetsw[];
109
110/*
111 * IP output. The packet in mbuf chain m contains a skeletal IP
112 * header (with len, off, ttl, proto, tos, src, dst).
113 * The mbuf chain containing the packet will be freed.
114 * The mbuf opt, if present, will not be freed.
115 */
116int
117ip_output(m0, opt, ro, flags, imo)
118 struct mbuf *m0;
119 struct mbuf *opt;
120 struct route *ro;
121 int flags;
122 struct ip_moptions *imo;
123{
124 struct ip *ip, *mhip;
125 struct ifnet *ifp;
126 struct mbuf *m = m0;
127 int hlen = sizeof (struct ip);
128 int len, off, error = 0;
129 struct sockaddr_in *dst;
130 struct in_ifaddr *ia;
131 int isbroadcast, sw_csum;
132#ifdef IPSEC
133 struct route iproute;
134 struct socket *so = NULL;
135 struct secpolicy *sp = NULL;
136#endif
137 u_int16_t divert_cookie; /* firewall cookie */
138#ifdef PFIL_HOOKS
139 struct packet_filter_hook *pfh;
140 struct mbuf *m1;
141 int rv;
142#endif /* PFIL_HOOKS */
143#ifdef IPFIREWALL_FORWARD
144 int fwd_rewrite_src = 0;
145#endif
146 struct ip_fw_chain *rule = NULL;
147
148#ifdef IPDIVERT
149 /* Get and reset firewall cookie */
150 divert_cookie = ip_divert_cookie;
151 ip_divert_cookie = 0;
152#else
153 divert_cookie = 0;
154#endif
155
156#if defined(IPFIREWALL) && defined(DUMMYNET)
157 /*
158 * dummynet packet are prepended a vestigial mbuf with
159 * m_type = MT_DUMMYNET and m_data pointing to the matching
160 * rule.
161 */
162 if (m->m_type == MT_DUMMYNET) {
163 /*
164 * the packet was already tagged, so part of the
165 * processing was already done, and we need to go down.
166 * Get parameters from the header.
167 */
168 rule = (struct ip_fw_chain *)(m->m_data) ;
169 opt = NULL ;
170 ro = & ( ((struct dn_pkt *)m)->ro ) ;
171 imo = NULL ;
172 dst = ((struct dn_pkt *)m)->dn_dst ;
173 ifp = ((struct dn_pkt *)m)->ifp ;
174 flags = ((struct dn_pkt *)m)->flags ;
175
176 m0 = m = m->m_next ;
177#ifdef IPSEC
178 so = ipsec_getsocket(m);
179 (void)ipsec_setsocket(m, NULL);
180#endif
181 ip = mtod(m, struct ip *);
182 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
183 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
184 goto sendit;
185 } else
186 rule = NULL ;
187#endif
188#ifdef IPSEC
189 so = ipsec_getsocket(m);
190 (void)ipsec_setsocket(m, NULL);
191#endif
192
193#ifdef DIAGNOSTIC
194 if ((m->m_flags & M_PKTHDR) == 0)
195 panic("ip_output no HDR");
196 if (!ro)
197 panic("ip_output no route, proto = %d",
198 mtod(m, struct ip *)->ip_p);
199#endif
200 if (opt) {
201 m = ip_insertoptions(m, opt, &len);
202 hlen = len;
203 }
204 ip = mtod(m, struct ip *);
205 /*
206 * Fill in IP header.
207 */
208 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
209 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
210 ip->ip_off &= IP_DF;
211#ifdef RANDOM_IP_ID
212 ip->ip_id = ip_randomid();
213#else
214 ip->ip_id = htons(ip_id++);
215#endif
216 ipstat.ips_localout++;
217 } else {
218 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
219 }
220
221 dst = (struct sockaddr_in *)&ro->ro_dst;
222 /*
223 * If there is a cached route,
224 * check that it is to the same destination
225 * and is still up. If not, free it and try again.
226 */
227 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
228 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
229 RTFREE(ro->ro_rt);
230 ro->ro_rt = (struct rtentry *)0;
231 }
232 if (ro->ro_rt == 0) {
233 dst->sin_family = AF_INET;
234 dst->sin_len = sizeof(*dst);
235 dst->sin_addr = ip->ip_dst;
236 }
237 /*
238 * If routing to interface only,
239 * short circuit routing lookup.
240 */
241#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
242#define sintosa(sin) ((struct sockaddr *)(sin))
243 if (flags & IP_ROUTETOIF) {
244 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
245 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
246 ipstat.ips_noroute++;
247 error = ENETUNREACH;
248 goto bad;
249 }
250 ifp = ia->ia_ifp;
251 ip->ip_ttl = 1;
252 isbroadcast = in_broadcast(dst->sin_addr, ifp);
253 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
254 imo != NULL && imo->imo_multicast_ifp != NULL) {
255 /*
256 * Bypass the normal routing lookup for multicast
257 * packets if the interface is specified.
258 */
259 ifp = imo->imo_multicast_ifp;
260 IFP_TO_IA(ifp, ia);
261 isbroadcast = 0; /* fool gcc */
262 } else {
263 /*
264 * If this is the case, we probably don't want to allocate
265 * a protocol-cloned route since we didn't get one from the
266 * ULP. This lets TCP do its thing, while not burdening
267 * forwarding or ICMP with the overhead of cloning a route.
268 * Of course, we still want to do any cloning requested by
269 * the link layer, as this is probably required in all cases
270 * for correct operation (as it is for ARP).
271 */
272 if (ro->ro_rt == 0)
273 rtalloc_ign(ro, RTF_PRCLONING);
274 if (ro->ro_rt == 0) {
275 ipstat.ips_noroute++;
276 error = EHOSTUNREACH;
277 goto bad;
278 }
279 ia = ifatoia(ro->ro_rt->rt_ifa);
280 ifp = ro->ro_rt->rt_ifp;
281 ro->ro_rt->rt_use++;
282 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
283 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
284 if (ro->ro_rt->rt_flags & RTF_HOST)
285 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
286 else
287 isbroadcast = in_broadcast(dst->sin_addr, ifp);
288 }
289 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
290 struct in_multi *inm;
291
292 m->m_flags |= M_MCAST;
293 /*
294 * IP destination address is multicast. Make sure "dst"
295 * still points to the address in "ro". (It may have been
296 * changed to point to a gateway address, above.)
297 */
298 dst = (struct sockaddr_in *)&ro->ro_dst;
299 /*
300 * See if the caller provided any multicast options
301 */
302 if (imo != NULL) {
303 ip->ip_ttl = imo->imo_multicast_ttl;
304 if (imo->imo_multicast_vif != -1)
305 ip->ip_src.s_addr =
306 ip_mcast_src(imo->imo_multicast_vif);
307 } else
308 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
309 /*
310 * Confirm that the outgoing interface supports multicast.
311 */
312 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
313 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
314 ipstat.ips_noroute++;
315 error = ENETUNREACH;
316 goto bad;
317 }
318 }
319 /*
320 * If source address not specified yet, use address
321 * of outgoing interface.
322 */
323 if (ip->ip_src.s_addr == INADDR_ANY) {
324 /* Interface may have no addresses. */
325 if (ia != NULL)
326 ip->ip_src = IA_SIN(ia)->sin_addr;
327 }
328
329 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
330 if (inm != NULL &&
331 (imo == NULL || imo->imo_multicast_loop)) {
332 /*
333 * If we belong to the destination multicast group
334 * on the outgoing interface, and the caller did not
335 * forbid loopback, loop back a copy.
336 */
337 ip_mloopback(ifp, m, dst, hlen);
338 }
339 else {
340 /*
341 * If we are acting as a multicast router, perform
342 * multicast forwarding as if the packet had just
343 * arrived on the interface to which we are about
344 * to send. The multicast forwarding function
345 * recursively calls this function, using the
346 * IP_FORWARDING flag to prevent infinite recursion.
347 *
348 * Multicasts that are looped back by ip_mloopback(),
349 * above, will be forwarded by the ip_input() routine,
350 * if necessary.
351 */
352 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
353 /*
354 * Check if rsvp daemon is running. If not, don't
355 * set ip_moptions. This ensures that the packet
356 * is multicast and not just sent down one link
357 * as prescribed by rsvpd.
358 */
359 if (!rsvp_on)
360 imo = NULL;
361 if (ip_mforward(ip, ifp, m, imo) != 0) {
362 m_freem(m);
363 goto done;
364 }
365 }
366 }
367
368 /*
369 * Multicasts with a time-to-live of zero may be looped-
370 * back, above, but must not be transmitted on a network.
371 * Also, multicasts addressed to the loopback interface
372 * are not sent -- the above call to ip_mloopback() will
373 * loop back a copy if this host actually belongs to the
374 * destination group on the loopback interface.
375 */
376 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
377 m_freem(m);
378 goto done;
379 }
380
381 goto sendit;
382 }
383#ifndef notdef
384 /*
385 * If source address not specified yet, use address
386 * of outgoing interface.
387 */
388 if (ip->ip_src.s_addr == INADDR_ANY) {
389 /* Interface may have no addresses. */
390 if (ia != NULL) {
391 ip->ip_src = IA_SIN(ia)->sin_addr;
392#ifdef IPFIREWALL_FORWARD
393 /* Keep note that we did this - if the firewall changes
394 * the next-hop, our interface may change, changing the
395 * default source IP. It's a shame so much effort happens
396 * twice. Oh well.
397 */
398 fwd_rewrite_src++;
399#endif /* IPFIREWALL_FORWARD */
400 }
401 }
402#endif /* notdef */
403 /*
404 * Verify that we have any chance at all of being able to queue
405 * the packet or packet fragments
406 */
407 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
408 ifp->if_snd.ifq_maxlen) {
409 error = ENOBUFS;
410 goto bad;
411 }
412
413 /*
414 * Look for broadcast address and
415 * and verify user is allowed to send
416 * such a packet.
417 */
418 if (isbroadcast) {
419 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
420 error = EADDRNOTAVAIL;
421 goto bad;
422 }
423 if ((flags & IP_ALLOWBROADCAST) == 0) {
424 error = EACCES;
425 goto bad;
426 }
427 /* don't allow broadcast messages to be fragmented */
428 if ((u_short)ip->ip_len > ifp->if_mtu) {
429 error = EMSGSIZE;
430 goto bad;
431 }
432 m->m_flags |= M_BCAST;
433 } else {
434 m->m_flags &= ~M_BCAST;
435 }
436
437sendit:
438#ifdef IPSEC
439 /* get SP for this packet */
440 if (so == NULL)
441 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
442 else
443 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
444
445 if (sp == NULL) {
446 ipsecstat.out_inval++;
447 goto bad;
448 }
449
450 error = 0;
451
452 /* check policy */
453 switch (sp->policy) {
454 case IPSEC_POLICY_DISCARD:
455 /*
456 * This packet is just discarded.
457 */
458 ipsecstat.out_polvio++;
459 goto bad;
460
461 case IPSEC_POLICY_BYPASS:
462 case IPSEC_POLICY_NONE:
463 /* no need to do IPsec. */
464 goto skip_ipsec;
465
466 case IPSEC_POLICY_IPSEC:
467 if (sp->req == NULL) {
468 /* acquire a policy */
469 error = key_spdacquire(sp);
470 goto bad;
471 }
472 break;
473
474 case IPSEC_POLICY_ENTRUST:
475 default:
476 printf("ip_output: Invalid policy found. %d\n", sp->policy);
477 }
478 {
479 struct ipsec_output_state state;
480 bzero(&state, sizeof(state));
481 state.m = m;
482 if (flags & IP_ROUTETOIF) {
483 state.ro = &iproute;
484 bzero(&iproute, sizeof(iproute));
485 } else
486 state.ro = ro;
487 state.dst = (struct sockaddr *)dst;
488
489 ip->ip_sum = 0;
490
491 /*
492 * XXX
493 * delayed checksums are not currently compatible with IPsec
494 */
495 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
496 in_delayed_cksum(m);
497 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
498 }
499
500 HTONS(ip->ip_len);
501 HTONS(ip->ip_off);
502
503 error = ipsec4_output(&state, sp, flags);
504
505 m = state.m;
506 if (flags & IP_ROUTETOIF) {
507 /*
508 * if we have tunnel mode SA, we may need to ignore
509 * IP_ROUTETOIF.
510 */
511 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
512 flags &= ~IP_ROUTETOIF;
513 ro = state.ro;
514 }
515 } else
516 ro = state.ro;
517 dst = (struct sockaddr_in *)state.dst;
518 if (error) {
519 /* mbuf is already reclaimed in ipsec4_output. */
520 m0 = NULL;
521 switch (error) {
522 case EHOSTUNREACH:
523 case ENETUNREACH:
524 case EMSGSIZE:
525 case ENOBUFS:
526 case ENOMEM:
527 break;
528 default:
529 printf("ip4_output (ipsec): error code %d\n", error);
530 /*fall through*/
531 case ENOENT:
532 /* don't show these error codes to the user */
533 error = 0;
534 break;
535 }
536 goto bad;
537 }
538 }
539
540 /* be sure to update variables that are affected by ipsec4_output() */
541 ip = mtod(m, struct ip *);
542#ifdef _IP_VHL
543 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
544#else
545 hlen = ip->ip_hl << 2;
546#endif
547 if (ro->ro_rt == NULL) {
548 if ((flags & IP_ROUTETOIF) == 0) {
549 printf("ip_output: "
550 "can't update route after IPsec processing\n");
551 error = EHOSTUNREACH; /*XXX*/
552 goto bad;
553 }
554 } else {
555 ia = ifatoia(ro->ro_rt->rt_ifa);
556 ifp = ro->ro_rt->rt_ifp;
557 }
558
559 /* make it flipped, again. */
560 NTOHS(ip->ip_len);
561 NTOHS(ip->ip_off);
562skip_ipsec:
563#endif /*IPSEC*/
564
565 /*
566 * IpHack's section.
567 * - Xlate: translate packet's addr/port (NAT).
568 * - Firewall: deny/allow/etc.
569 * - Wrap: fake packet's addr/port <unimpl.>
570 * - Encapsulate: put it in another IP and send out. <unimp.>
571 */
572#ifdef PFIL_HOOKS
573 /*
574 * Run through list of hooks for output packets.
575 */
576 m1 = m;
577 pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
578 for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
579 if (pfh->pfil_func) {
580 rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1);
581 if (rv) {
582 error = EHOSTUNREACH;
583 goto done;
584 }
585 m = m1;
586 if (m == NULL)
587 goto done;
588 ip = mtod(m, struct ip *);
589 }
590#endif /* PFIL_HOOKS */
591
592 /*
593 * Check with the firewall...
594 */
595 if (fw_enable && ip_fw_chk_ptr) {
596 struct sockaddr_in *old = dst;
597
598 off = (*ip_fw_chk_ptr)(&ip,
599 hlen, ifp, &divert_cookie, &m, &rule, &dst);
600 /*
601 * On return we must do the following:
602 * m == NULL -> drop the pkt (old interface, deprecated)
603 * (off & 0x40000) -> drop the pkt (new interface)
604 * 1<=off<= 0xffff -> DIVERT
605 * (off & 0x10000) -> send to a DUMMYNET pipe
606 * (off & 0x20000) -> TEE the packet
607 * dst != old -> IPFIREWALL_FORWARD
608 * off==0, dst==old -> accept
609 * If some of the above modules is not compiled in, then
610 * we should't have to check the corresponding condition
611 * (because the ipfw control socket should not accept
612 * unsupported rules), but better play safe and drop
613 * packets in case of doubt.
614 */
615 if (off & IP_FW_PORT_DENY_FLAG) { /* XXX new interface-denied */
616 if (m)
617 m_freem(m);
618 error = EACCES ;
619 goto done;
620 }
621 if (!m) { /* firewall said to reject */
622 static int __debug=10;
623 if (__debug >0) {
624 printf("firewall returns NULL, please update!\n");
625 __debug-- ;
626 }
627 error = EACCES;
628 goto done;
629 }
630 if (off == 0 && dst == old) /* common case */
631 goto pass ;
632#ifdef DUMMYNET
633 if ((off & IP_FW_PORT_DYNT_FLAG) != 0) {
634 /*
635 * pass the pkt to dummynet. Need to include
636 * pipe number, m, ifp, ro, dst because these are
637 * not recomputed in the next pass.
638 * All other parameters have been already used and
639 * so they are not needed anymore.
640 * XXX note: if the ifp or ro entry are deleted
641 * while a pkt is in dummynet, we are in trouble!
642 */
643 error = dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,
644 ifp,ro,dst,rule, flags);
645 goto done;
646 }
647#endif
648#ifdef IPDIVERT
649 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
650 struct mbuf *clone = NULL;
651
652 /* Clone packet if we're doing a 'tee' */
653 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
654 clone = m_dup(m, M_DONTWAIT);
655
656 /*
657 * XXX
658 * delayed checksums are not currently compatible
659 * with divert sockets.
660 */
661 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
662 in_delayed_cksum(m);
663 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
664 }
665
666 /* Restore packet header fields to original values */
667 HTONS(ip->ip_len);
668 HTONS(ip->ip_off);
669
670 /* Deliver packet to divert input routine */
671 ip_divert_cookie = divert_cookie;
672 divert_packet(m, 0, off & 0xffff);
673
674 /* If 'tee', continue with original packet */
675 if (clone != NULL) {
676 m = clone;
677 ip = mtod(m, struct ip *);
678 goto pass;
679 }
680 goto done;
681 }
682#endif
683
684#ifdef IPFIREWALL_FORWARD
685 /* Here we check dst to make sure it's directly reachable on the
686 * interface we previously thought it was.
687 * If it isn't (which may be likely in some situations) we have
688 * to re-route it (ie, find a route for the next-hop and the
689 * associated interface) and set them here. This is nested
690 * forwarding which in most cases is undesirable, except where
691 * such control is nigh impossible. So we do it here.
692 * And I'm babbling.
693 */
694 if (off == 0 && old != dst) {
695 struct in_ifaddr *ia;
696
697 /* It's changed... */
698 /* There must be a better way to do this next line... */
699 static struct route sro_fwd, *ro_fwd = &sro_fwd;
700#ifdef IPFIREWALL_FORWARD_DEBUG
701 printf("IPFIREWALL_FORWARD: New dst ip: ");
702 print_ip(dst->sin_addr);
703 printf("\n");
704#endif
705 /*
706 * We need to figure out if we have been forwarded
707 * to a local socket. If so then we should somehow
708 * "loop back" to ip_input, and get directed to the
709 * PCB as if we had received this packet. This is
710 * because it may be dificult to identify the packets
711 * you want to forward until they are being output
712 * and have selected an interface. (e.g. locally
713 * initiated packets) If we used the loopback inteface,
714 * we would not be able to control what happens
715 * as the packet runs through ip_input() as
716 * it is done through a ISR.
717 */
718 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
719 /*
720 * If the addr to forward to is one
721 * of ours, we pretend to
722 * be the destination for this packet.
723 */
724 if (IA_SIN(ia)->sin_addr.s_addr ==
725 dst->sin_addr.s_addr)
726 break;
727 }
728 if (ia) {
729 /* tell ip_input "dont filter" */
730 ip_fw_fwd_addr = dst;
731 if (m->m_pkthdr.rcvif == NULL)
732 m->m_pkthdr.rcvif = ifunit("lo0");
733 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
734 m->m_pkthdr.csum_flags |=
735 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
736 m0->m_pkthdr.csum_data = 0xffff;
737 }
738 m->m_pkthdr.csum_flags |=
739 CSUM_IP_CHECKED | CSUM_IP_VALID;
740 HTONS(ip->ip_len);
741 HTONS(ip->ip_off);
742 ip_input(m);
743 goto done;
744 }
745 /* Some of the logic for this was
746 * nicked from above.
747 *
748 * This rewrites the cached route in a local PCB.
749 * Is this what we want to do?
750 */
751 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
752
753 ro_fwd->ro_rt = 0;
754 rtalloc_ign(ro_fwd, RTF_PRCLONING);
755
756 if (ro_fwd->ro_rt == 0) {
757 ipstat.ips_noroute++;
758 error = EHOSTUNREACH;
759 goto bad;
760 }
761
762 ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
763 ifp = ro_fwd->ro_rt->rt_ifp;
764 ro_fwd->ro_rt->rt_use++;
765 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
766 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
767 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
768 isbroadcast =
769 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
770 else
771 isbroadcast = in_broadcast(dst->sin_addr, ifp);
772 RTFREE(ro->ro_rt);
773 ro->ro_rt = ro_fwd->ro_rt;
774 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
775
776 /*
777 * If we added a default src ip earlier,
778 * which would have been gotten from the-then
779 * interface, do it again, from the new one.
780 */
781 if (fwd_rewrite_src)
782 ip->ip_src = IA_SIN(ia)->sin_addr;
783 goto pass ;
784 }
785#endif /* IPFIREWALL_FORWARD */
786 /*
787 * if we get here, none of the above matches, and
788 * we have to drop the pkt
789 */
790 m_freem(m);
791 error = EACCES; /* not sure this is the right error msg */
792 goto done;
793 }
794
795pass:
796 m->m_pkthdr.csum_flags |= CSUM_IP;
797 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
798 if (sw_csum & CSUM_DELAY_DATA) {
799 in_delayed_cksum(m);
800 sw_csum &= ~CSUM_DELAY_DATA;
801 }
802 m->m_pkthdr.csum_flags &= ifp->if_hwassist;
803
804 /*
805 * If small enough for interface, or the interface will take
806 * care of the fragmentation for us, can just send directly.
807 */
808 if ((u_short)ip->ip_len <= ifp->if_mtu ||
809 ifp->if_hwassist & CSUM_FRAGMENT) {
810 HTONS(ip->ip_len);
811 HTONS(ip->ip_off);
812 ip->ip_sum = 0;
813 if (sw_csum & CSUM_DELAY_IP) {
814 if (ip->ip_vhl == IP_VHL_BORING) {
815 ip->ip_sum = in_cksum_hdr(ip);
816 } else {
817 ip->ip_sum = in_cksum(m, hlen);
818 }
819 }
820
821 /* Record statistics for this interface address. */
822 if (!(flags & IP_FORWARDING) && ia) {
823 ia->ia_ifa.if_opackets++;
824 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
825 }
826
827#ifdef IPSEC
828 /* clean ipsec history once it goes out of the node */
829 ipsec_delaux(m);
830#endif
831
832 error = (*ifp->if_output)(ifp, m,
833 (struct sockaddr *)dst, ro->ro_rt);
834 goto done;
835 }
836 /*
837 * Too large for interface; fragment if possible.
838 * Must be able to put at least 8 bytes per fragment.
839 */
840 if (ip->ip_off & IP_DF) {
841 error = EMSGSIZE;
842 /*
843 * This case can happen if the user changed the MTU
844 * of an interface after enabling IP on it. Because
845 * most netifs don't keep track of routes pointing to
846 * them, there is no way for one to update all its
847 * routes when the MTU is changed.
848 */
849 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
850 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
851 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
852 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
853 }
854 ipstat.ips_cantfrag++;
855 goto bad;
856 }
857 len = (ifp->if_mtu - hlen) &~ 7;
858 if (len < 8) {
859 error = EMSGSIZE;
860 goto bad;
861 }
862
863 /*
864 * if the interface will not calculate checksums on
865 * fragmented packets, then do it here.
866 */
867 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
868 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
869 in_delayed_cksum(m);
870 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
871 }
872
873 {
874 int mhlen, firstlen = len;
875 struct mbuf **mnext = &m->m_nextpkt;
876 int nfrags = 1;
877
878 /*
879 * Loop through length of segment after first fragment,
880 * make new header and copy data of each part and link onto chain.
881 */
882 m0 = m;
883 mhlen = sizeof (struct ip);
884 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
885 MGETHDR(m, M_DONTWAIT, MT_HEADER);
886 if (m == 0) {
887 error = ENOBUFS;
888 ipstat.ips_odropped++;
889 goto sendorfree;
890 }
891 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
892 m->m_data += max_linkhdr;
893 mhip = mtod(m, struct ip *);
894 *mhip = *ip;
895 if (hlen > sizeof (struct ip)) {
896 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
897 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
898 }
899 m->m_len = mhlen;
900 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
901 if (off + len >= (u_short)ip->ip_len)
902 len = (u_short)ip->ip_len - off;
903 else
904 mhip->ip_off |= IP_MF;
905 mhip->ip_len = htons((u_short)(len + mhlen));
906 m->m_next = m_copy(m0, off, len);
907 if (m->m_next == 0) {
908 (void) m_free(m);
909 error = ENOBUFS; /* ??? */
910 ipstat.ips_odropped++;
911 goto sendorfree;
912 }
913 m->m_pkthdr.len = mhlen + len;
914 m->m_pkthdr.rcvif = (struct ifnet *)0;
915 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
916 HTONS(mhip->ip_off);
917 mhip->ip_sum = 0;
918 if (sw_csum & CSUM_DELAY_IP) {
919 if (mhip->ip_vhl == IP_VHL_BORING) {
920 mhip->ip_sum = in_cksum_hdr(mhip);
921 } else {
922 mhip->ip_sum = in_cksum(m, mhlen);
923 }
924 }
925 *mnext = m;
926 mnext = &m->m_nextpkt;
927 nfrags++;
928 }
929 ipstat.ips_ofragments += nfrags;
930
931 /* set first/last markers for fragment chain */
932 m->m_flags |= M_LASTFRAG;
933 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
934 m0->m_pkthdr.csum_data = nfrags;
935
936 /*
937 * Update first fragment by trimming what's been copied out
938 * and updating header, then send each fragment (in order).
939 */
940 m = m0;
941 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
942 m->m_pkthdr.len = hlen + firstlen;
943 ip->ip_len = htons((u_short)m->m_pkthdr.len);
944 ip->ip_off |= IP_MF;
945 HTONS(ip->ip_off);
946 ip->ip_sum = 0;
947 if (sw_csum & CSUM_DELAY_IP) {
948 if (ip->ip_vhl == IP_VHL_BORING) {
949 ip->ip_sum = in_cksum_hdr(ip);
950 } else {
951 ip->ip_sum = in_cksum(m, hlen);
952 }
953 }
954sendorfree:
955 for (m = m0; m; m = m0) {
956 m0 = m->m_nextpkt;
957 m->m_nextpkt = 0;
958#ifdef IPSEC
959 /* clean ipsec history once it goes out of the node */
960 ipsec_delaux(m);
961#endif
962 if (error == 0) {
963 /* Record statistics for this interface address. */
964 if (ia != NULL) {
965 ia->ia_ifa.if_opackets++;
966 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
967 }
968
969 error = (*ifp->if_output)(ifp, m,
970 (struct sockaddr *)dst, ro->ro_rt);
971 } else
972 m_freem(m);
973 }
974
975 if (error == 0)
976 ipstat.ips_fragmented++;
977 }
978done:
979#ifdef IPSEC
980 if (ro == &iproute && ro->ro_rt) {
981 RTFREE(ro->ro_rt);
982 ro->ro_rt = NULL;
983 }
984 if (sp != NULL) {
985 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
986 printf("DP ip_output call free SP:%p\n", sp));
987 key_freesp(sp);
988 }
989#endif /* IPSEC */
990 return (error);
991bad:
992 m_freem(m0);
993 goto done;
994}
995
996void
997in_delayed_cksum(struct mbuf *m)
998{
999 struct ip *ip;
1000 u_short csum, offset;
1001
1002 ip = mtod(m, struct ip *);
1003 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1004 csum = in_cksum_skip(m, ip->ip_len, offset);
1005 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1006 csum = 0xffff;
1007 offset += m->m_pkthdr.csum_data; /* checksum offset */
1008
1009 if (offset + sizeof(u_short) > m->m_len) {
1010 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1011 m->m_len, offset, ip->ip_p);
1012 /*
1013 * XXX
1014 * this shouldn't happen, but if it does, the
1015 * correct behavior may be to insert the checksum
1016 * in the existing chain instead of rearranging it.
1017 */
1018 m = m_pullup(m, offset + sizeof(u_short));
1019 }
1020 *(u_short *)(m->m_data + offset) = csum;
1021}
1022
1023/*
1024 * Insert IP options into preformed packet.
1025 * Adjust IP destination as required for IP source routing,
1026 * as indicated by a non-zero in_addr at the start of the options.
1027 *
1028 * XXX This routine assumes that the packet has no options in place.
1029 */
1030static struct mbuf *
1031ip_insertoptions(m, opt, phlen)
1032 register struct mbuf *m;
1033 struct mbuf *opt;
1034 int *phlen;
1035{
1036 register struct ipoption *p = mtod(opt, struct ipoption *);
1037 struct mbuf *n;
1038 register struct ip *ip = mtod(m, struct ip *);
1039 unsigned optlen;
1040
1041 optlen = opt->m_len - sizeof(p->ipopt_dst);
1042 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1043 return (m); /* XXX should fail */
1044 if (p->ipopt_dst.s_addr)
1045 ip->ip_dst = p->ipopt_dst;
1046 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1047 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1048 if (n == 0)
1049 return (m);
1050 n->m_pkthdr.rcvif = (struct ifnet *)0;
1051 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1052 m->m_len -= sizeof(struct ip);
1053 m->m_data += sizeof(struct ip);
1054 n->m_next = m;
1055 m = n;
1056 m->m_len = optlen + sizeof(struct ip);
1057 m->m_data += max_linkhdr;
1058 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1059 } else {
1060 m->m_data -= optlen;
1061 m->m_len += optlen;
1062 m->m_pkthdr.len += optlen;
1063 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1064 }
1065 ip = mtod(m, struct ip *);
1066 bcopy(p->ipopt_list, ip + 1, optlen);
1067 *phlen = sizeof(struct ip) + optlen;
1068 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1069 ip->ip_len += optlen;
1070 return (m);
1071}
1072
1073/*
1074 * Copy options from ip to jp,
1075 * omitting those not copied during fragmentation.
1076 */
1077int
1078ip_optcopy(ip, jp)
1079 struct ip *ip, *jp;
1080{
1081 register u_char *cp, *dp;
1082 int opt, optlen, cnt;
1083
1084 cp = (u_char *)(ip + 1);
1085 dp = (u_char *)(jp + 1);
1086 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1087 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1088 opt = cp[0];
1089 if (opt == IPOPT_EOL)
1090 break;
1091 if (opt == IPOPT_NOP) {
1092 /* Preserve for IP mcast tunnel's LSRR alignment. */
1093 *dp++ = IPOPT_NOP;
1094 optlen = 1;
1095 continue;
1096 }
1097#ifdef DIAGNOSTIC
1098 if (cnt < IPOPT_OLEN + sizeof(*cp))
1099 panic("malformed IPv4 option passed to ip_optcopy");
1100#endif
1101 optlen = cp[IPOPT_OLEN];
1102#ifdef DIAGNOSTIC
1103 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1104 panic("malformed IPv4 option passed to ip_optcopy");
1105#endif
1106 /* bogus lengths should have been caught by ip_dooptions */
1107 if (optlen > cnt)
1108 optlen = cnt;
1109 if (IPOPT_COPIED(opt)) {
1110 bcopy(cp, dp, optlen);
1111 dp += optlen;
1112 }
1113 }
1114 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1115 *dp++ = IPOPT_EOL;
1116 return (optlen);
1117}
1118
1119/*
1120 * IP socket option processing.
1121 */
1122int
1123ip_ctloutput(so, sopt)
1124 struct socket *so;
1125 struct sockopt *sopt;
1126{
1127 struct inpcb *inp = sotoinpcb(so);
1128 int error, optval;
1129
1130 error = optval = 0;
1131 if (sopt->sopt_level != IPPROTO_IP) {
1132 return (EINVAL);
1133 }
1134
1135 switch (sopt->sopt_dir) {
1136 case SOPT_SET:
1137 switch (sopt->sopt_name) {
1138 case IP_OPTIONS:
1139#ifdef notyet
1140 case IP_RETOPTS:
1141#endif
1142 {
1143 struct mbuf *m;
1144 if (sopt->sopt_valsize > MLEN) {
1145 error = EMSGSIZE;
1146 break;
1147 }
1148 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
1149 if (m == 0) {
1150 error = ENOBUFS;
1151 break;
1152 }
1153 m->m_len = sopt->sopt_valsize;
1154 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1155 m->m_len);
1156
1157 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1158 m));
1159 }
1160
1161 case IP_TOS:
1162 case IP_TTL:
1163 case IP_RECVOPTS:
1164 case IP_RECVRETOPTS:
1165 case IP_RECVDSTADDR:
1166 case IP_RECVIF:
1167 case IP_FAITH:
1168 error = sooptcopyin(sopt, &optval, sizeof optval,
1169 sizeof optval);
1170 if (error)
1171 break;
1172
1173 switch (sopt->sopt_name) {
1174 case IP_TOS:
1175 inp->inp_ip_tos = optval;
1176 break;
1177
1178 case IP_TTL:
1179 inp->inp_ip_ttl = optval;
1180 break;
1181#define OPTSET(bit) \
1182 if (optval) \
1183 inp->inp_flags |= bit; \
1184 else \
1185 inp->inp_flags &= ~bit;
1186
1187 case IP_RECVOPTS:
1188 OPTSET(INP_RECVOPTS);
1189 break;
1190
1191 case IP_RECVRETOPTS:
1192 OPTSET(INP_RECVRETOPTS);
1193 break;
1194
1195 case IP_RECVDSTADDR:
1196 OPTSET(INP_RECVDSTADDR);
1197 break;
1198
1199 case IP_RECVIF:
1200 OPTSET(INP_RECVIF);
1201 break;
1202
1203 case IP_FAITH:
1204 OPTSET(INP_FAITH);
1205 break;
1206 }
1207 break;
1208#undef OPTSET
1209
1210 case IP_MULTICAST_IF:
1211 case IP_MULTICAST_VIF:
1212 case IP_MULTICAST_TTL:
1213 case IP_MULTICAST_LOOP:
1214 case IP_ADD_MEMBERSHIP:
1215 case IP_DROP_MEMBERSHIP:
1216 error = ip_setmoptions(sopt, &inp->inp_moptions);
1217 break;
1218
1219 case IP_PORTRANGE:
1220 error = sooptcopyin(sopt, &optval, sizeof optval,
1221 sizeof optval);
1222 if (error)
1223 break;
1224
1225 switch (optval) {
1226 case IP_PORTRANGE_DEFAULT:
1227 inp->inp_flags &= ~(INP_LOWPORT);
1228 inp->inp_flags &= ~(INP_HIGHPORT);
1229 break;
1230
1231 case IP_PORTRANGE_HIGH:
1232 inp->inp_flags &= ~(INP_LOWPORT);
1233 inp->inp_flags |= INP_HIGHPORT;
1234 break;
1235
1236 case IP_PORTRANGE_LOW:
1237 inp->inp_flags &= ~(INP_HIGHPORT);
1238 inp->inp_flags |= INP_LOWPORT;
1239 break;
1240
1241 default:
1242 error = EINVAL;
1243 break;
1244 }
1245 break;
1246
1247#ifdef IPSEC
1248 case IP_IPSEC_POLICY:
1249 {
1250 caddr_t req;
1251 size_t len = 0;
1252 int priv;
1253 struct mbuf *m;
1254 int optname;
1255
1256 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1257 break;
1258 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1259 break;
1260 priv = (sopt->sopt_td != NULL &&
1261 suser_td(sopt->sopt_td) != 0) ? 0 : 1;
1262 req = mtod(m, caddr_t);
1263 len = m->m_len;
1264 optname = sopt->sopt_name;
1265 error = ipsec4_set_policy(inp, optname, req, len, priv);
1266 m_freem(m);
1267 break;
1268 }
1269#endif /*IPSEC*/
1270
1271 default:
1272 error = ENOPROTOOPT;
1273 break;
1274 }
1275 break;
1276
1277 case SOPT_GET:
1278 switch (sopt->sopt_name) {
1279 case IP_OPTIONS:
1280 case IP_RETOPTS:
1281 if (inp->inp_options)
1282 error = sooptcopyout(sopt,
1283 mtod(inp->inp_options,
1284 char *),
1285 inp->inp_options->m_len);
1286 else
1287 sopt->sopt_valsize = 0;
1288 break;
1289
1290 case IP_TOS:
1291 case IP_TTL:
1292 case IP_RECVOPTS:
1293 case IP_RECVRETOPTS:
1294 case IP_RECVDSTADDR:
1295 case IP_RECVIF:
1296 case IP_PORTRANGE:
1297 case IP_FAITH:
1298 switch (sopt->sopt_name) {
1299
1300 case IP_TOS:
1301 optval = inp->inp_ip_tos;
1302 break;
1303
1304 case IP_TTL:
1305 optval = inp->inp_ip_ttl;
1306 break;
1307
1308#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1309
1310 case IP_RECVOPTS:
1311 optval = OPTBIT(INP_RECVOPTS);
1312 break;
1313
1314 case IP_RECVRETOPTS:
1315 optval = OPTBIT(INP_RECVRETOPTS);
1316 break;
1317
1318 case IP_RECVDSTADDR:
1319 optval = OPTBIT(INP_RECVDSTADDR);
1320 break;
1321
1322 case IP_RECVIF:
1323 optval = OPTBIT(INP_RECVIF);
1324 break;
1325
1326 case IP_PORTRANGE:
1327 if (inp->inp_flags & INP_HIGHPORT)
1328 optval = IP_PORTRANGE_HIGH;
1329 else if (inp->inp_flags & INP_LOWPORT)
1330 optval = IP_PORTRANGE_LOW;
1331 else
1332 optval = 0;
1333 break;
1334
1335 case IP_FAITH:
1336 optval = OPTBIT(INP_FAITH);
1337 break;
1338 }
1339 error = sooptcopyout(sopt, &optval, sizeof optval);
1340 break;
1341
1342 case IP_MULTICAST_IF:
1343 case IP_MULTICAST_VIF:
1344 case IP_MULTICAST_TTL:
1345 case IP_MULTICAST_LOOP:
1346 case IP_ADD_MEMBERSHIP:
1347 case IP_DROP_MEMBERSHIP:
1348 error = ip_getmoptions(sopt, inp->inp_moptions);
1349 break;
1350
1351#ifdef IPSEC
1352 case IP_IPSEC_POLICY:
1353 {
1354 struct mbuf *m = NULL;
1355 caddr_t req = NULL;
1356 size_t len = 0;
1357
1358 if (m != 0) {
1359 req = mtod(m, caddr_t);
1360 len = m->m_len;
1361 }
1362 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1363 if (error == 0)
1364 error = soopt_mcopyout(sopt, m); /* XXX */
1365 if (error == 0)
1366 m_freem(m);
1367 break;
1368 }
1369#endif /*IPSEC*/
1370
1371 default:
1372 error = ENOPROTOOPT;
1373 break;
1374 }
1375 break;
1376 }
1377 return (error);
1378}
1379
1380/*
1381 * Set up IP options in pcb for insertion in output packets.
1382 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1383 * with destination address if source routed.
1384 */
1385static int
1386ip_pcbopts(optname, pcbopt, m)
1387 int optname;
1388 struct mbuf **pcbopt;
1389 register struct mbuf *m;
1390{
1391 register int cnt, optlen;
1392 register u_char *cp;
1393 u_char opt;
1394
1395 /* turn off any old options */
1396 if (*pcbopt)
1397 (void)m_free(*pcbopt);
1398 *pcbopt = 0;
1399 if (m == (struct mbuf *)0 || m->m_len == 0) {
1400 /*
1401 * Only turning off any previous options.
1402 */
1403 if (m)
1404 (void)m_free(m);
1405 return (0);
1406 }
1407
1408 if (m->m_len % sizeof(int32_t))
1409 goto bad;
1410 /*
1411 * IP first-hop destination address will be stored before
1412 * actual options; move other options back
1413 * and clear it when none present.
1414 */
1415 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1416 goto bad;
1417 cnt = m->m_len;
1418 m->m_len += sizeof(struct in_addr);
1419 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1420 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1421 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1422
1423 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1424 opt = cp[IPOPT_OPTVAL];
1425 if (opt == IPOPT_EOL)
1426 break;
1427 if (opt == IPOPT_NOP)
1428 optlen = 1;
1429 else {
1430 if (cnt < IPOPT_OLEN + sizeof(*cp))
1431 goto bad;
1432 optlen = cp[IPOPT_OLEN];
1433 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1434 goto bad;
1435 }
1436 switch (opt) {
1437
1438 default:
1439 break;
1440
1441 case IPOPT_LSRR:
1442 case IPOPT_SSRR:
1443 /*
1444 * user process specifies route as:
1445 * ->A->B->C->D
1446 * D must be our final destination (but we can't
1447 * check that since we may not have connected yet).
1448 * A is first hop destination, which doesn't appear in
1449 * actual IP option, but is stored before the options.
1450 */
1451 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1452 goto bad;
1453 m->m_len -= sizeof(struct in_addr);
1454 cnt -= sizeof(struct in_addr);
1455 optlen -= sizeof(struct in_addr);
1456 cp[IPOPT_OLEN] = optlen;
1457 /*
1458 * Move first hop before start of options.
1459 */
1460 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1461 sizeof(struct in_addr));
1462 /*
1463 * Then copy rest of options back
1464 * to close up the deleted entry.
1465 */
1466 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1467 sizeof(struct in_addr)),
1468 (caddr_t)&cp[IPOPT_OFFSET+1],
1469 (unsigned)cnt + sizeof(struct in_addr));
1470 break;
1471 }
1472 }
1473 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1474 goto bad;
1475 *pcbopt = m;
1476 return (0);
1477
1478bad:
1479 (void)m_free(m);
1480 return (EINVAL);
1481}
1482
1483/*
1484 * XXX
1485 * The whole multicast option thing needs to be re-thought.
1486 * Several of these options are equally applicable to non-multicast
1487 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1488 * standard option (IP_TTL).
1489 */
1490
1491/*
1492 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1493 */
1494static struct ifnet *
1495ip_multicast_if(a, ifindexp)
1496 struct in_addr *a;
1497 int *ifindexp;
1498{
1499 int ifindex;
1500 struct ifnet *ifp;
1501
1502 if (ifindexp)
1503 *ifindexp = 0;
1504 if (ntohl(a->s_addr) >> 24 == 0) {
1505 ifindex = ntohl(a->s_addr) & 0xffffff;
1506 if (ifindex < 0 || if_index < ifindex)
1507 return NULL;
1508 ifp = ifnet_byindex(ifindex);
1509 if (ifindexp)
1510 *ifindexp = ifindex;
1511 } else {
1512 INADDR_TO_IFP(*a, ifp);
1513 }
1514 return ifp;
1515}
1516
1517/*
1518 * Set the IP multicast options in response to user setsockopt().
1519 */
1520static int
1521ip_setmoptions(sopt, imop)
1522 struct sockopt *sopt;
1523 struct ip_moptions **imop;
1524{
1525 int error = 0;
1526 int i;
1527 struct in_addr addr;
1528 struct ip_mreq mreq;
1529 struct ifnet *ifp;
1530 struct ip_moptions *imo = *imop;
1531 struct route ro;
1532 struct sockaddr_in *dst;
1533 int ifindex;
1534 int s;
1535
1536 if (imo == NULL) {
1537 /*
1538 * No multicast option buffer attached to the pcb;
1539 * allocate one and initialize to default values.
1540 */
1541 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1542 M_WAITOK);
1543
1544 if (imo == NULL)
1545 return (ENOBUFS);
1546 *imop = imo;
1547 imo->imo_multicast_ifp = NULL;
1548 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1549 imo->imo_multicast_vif = -1;
1550 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1551 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1552 imo->imo_num_memberships = 0;
1553 }
1554
1555 switch (sopt->sopt_name) {
1556 /* store an index number for the vif you wanna use in the send */
1557 case IP_MULTICAST_VIF:
1558 if (legal_vif_num == 0) {
1559 error = EOPNOTSUPP;
1560 break;
1561 }
1562 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1563 if (error)
1564 break;
1565 if (!legal_vif_num(i) && (i != -1)) {
1566 error = EINVAL;
1567 break;
1568 }
1569 imo->imo_multicast_vif = i;
1570 break;
1571
1572 case IP_MULTICAST_IF:
1573 /*
1574 * Select the interface for outgoing multicast packets.
1575 */
1576 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1577 if (error)
1578 break;
1579 /*
1580 * INADDR_ANY is used to remove a previous selection.
1581 * When no interface is selected, a default one is
1582 * chosen every time a multicast packet is sent.
1583 */
1584 if (addr.s_addr == INADDR_ANY) {
1585 imo->imo_multicast_ifp = NULL;
1586 break;
1587 }
1588 /*
1589 * The selected interface is identified by its local
1590 * IP address. Find the interface and confirm that
1591 * it supports multicasting.
1592 */
1593 s = splimp();
1594 ifp = ip_multicast_if(&addr, &ifindex);
1595 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1596 splx(s);
1597 error = EADDRNOTAVAIL;
1598 break;
1599 }
1600 imo->imo_multicast_ifp = ifp;
1601 if (ifindex)
1602 imo->imo_multicast_addr = addr;
1603 else
1604 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1605 splx(s);
1606 break;
1607
1608 case IP_MULTICAST_TTL:
1609 /*
1610 * Set the IP time-to-live for outgoing multicast packets.
1611 * The original multicast API required a char argument,
1612 * which is inconsistent with the rest of the socket API.
1613 * We allow either a char or an int.
1614 */
1615 if (sopt->sopt_valsize == 1) {
1616 u_char ttl;
1617 error = sooptcopyin(sopt, &ttl, 1, 1);
1618 if (error)
1619 break;
1620 imo->imo_multicast_ttl = ttl;
1621 } else {
1622 u_int ttl;
1623 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1624 sizeof ttl);
1625 if (error)
1626 break;
1627 if (ttl > 255)
1628 error = EINVAL;
1629 else
1630 imo->imo_multicast_ttl = ttl;
1631 }
1632 break;
1633
1634 case IP_MULTICAST_LOOP:
1635 /*
1636 * Set the loopback flag for outgoing multicast packets.
1637 * Must be zero or one. The original multicast API required a
1638 * char argument, which is inconsistent with the rest
1639 * of the socket API. We allow either a char or an int.
1640 */
1641 if (sopt->sopt_valsize == 1) {
1642 u_char loop;
1643 error = sooptcopyin(sopt, &loop, 1, 1);
1644 if (error)
1645 break;
1646 imo->imo_multicast_loop = !!loop;
1647 } else {
1648 u_int loop;
1649 error = sooptcopyin(sopt, &loop, sizeof loop,
1650 sizeof loop);
1651 if (error)
1652 break;
1653 imo->imo_multicast_loop = !!loop;
1654 }
1655 break;
1656
1657 case IP_ADD_MEMBERSHIP:
1658 /*
1659 * Add a multicast group membership.
1660 * Group must be a valid IP multicast address.
1661 */
1662 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1663 if (error)
1664 break;
1665
1666 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1667 error = EINVAL;
1668 break;
1669 }
1670 s = splimp();
1671 /*
1672 * If no interface address was provided, use the interface of
1673 * the route to the given multicast address.
1674 */
1675 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1676 bzero((caddr_t)&ro, sizeof(ro));
1677 dst = (struct sockaddr_in *)&ro.ro_dst;
1678 dst->sin_len = sizeof(*dst);
1679 dst->sin_family = AF_INET;
1680 dst->sin_addr = mreq.imr_multiaddr;
1681 rtalloc(&ro);
1682 if (ro.ro_rt == NULL) {
1683 error = EADDRNOTAVAIL;
1684 splx(s);
1685 break;
1686 }
1687 ifp = ro.ro_rt->rt_ifp;
1688 rtfree(ro.ro_rt);
1689 }
1690 else {
1691 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1692 }
1693
1694 /*
1695 * See if we found an interface, and confirm that it
1696 * supports multicast.
1697 */
1698 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1699 error = EADDRNOTAVAIL;
1700 splx(s);
1701 break;
1702 }
1703 /*
1704 * See if the membership already exists or if all the
1705 * membership slots are full.
1706 */
1707 for (i = 0; i < imo->imo_num_memberships; ++i) {
1708 if (imo->imo_membership[i]->inm_ifp == ifp &&
1709 imo->imo_membership[i]->inm_addr.s_addr
1710 == mreq.imr_multiaddr.s_addr)
1711 break;
1712 }
1713 if (i < imo->imo_num_memberships) {
1714 error = EADDRINUSE;
1715 splx(s);
1716 break;
1717 }
1718 if (i == IP_MAX_MEMBERSHIPS) {
1719 error = ETOOMANYREFS;
1720 splx(s);
1721 break;
1722 }
1723 /*
1724 * Everything looks good; add a new record to the multicast
1725 * address list for the given interface.
1726 */
1727 if ((imo->imo_membership[i] =
1728 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1729 error = ENOBUFS;
1730 splx(s);
1731 break;
1732 }
1733 ++imo->imo_num_memberships;
1734 splx(s);
1735 break;
1736
1737 case IP_DROP_MEMBERSHIP:
1738 /*
1739 * Drop a multicast group membership.
1740 * Group must be a valid IP multicast address.
1741 */
1742 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1743 if (error)
1744 break;
1745
1746 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1747 error = EINVAL;
1748 break;
1749 }
1750
1751 s = splimp();
1752 /*
1753 * If an interface address was specified, get a pointer
1754 * to its ifnet structure.
1755 */
1756 if (mreq.imr_interface.s_addr == INADDR_ANY)
1757 ifp = NULL;
1758 else {
1759 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1760 if (ifp == NULL) {
1761 error = EADDRNOTAVAIL;
1762 splx(s);
1763 break;
1764 }
1765 }
1766 /*
1767 * Find the membership in the membership array.
1768 */
1769 for (i = 0; i < imo->imo_num_memberships; ++i) {
1770 if ((ifp == NULL ||
1771 imo->imo_membership[i]->inm_ifp == ifp) &&
1772 imo->imo_membership[i]->inm_addr.s_addr ==
1773 mreq.imr_multiaddr.s_addr)
1774 break;
1775 }
1776 if (i == imo->imo_num_memberships) {
1777 error = EADDRNOTAVAIL;
1778 splx(s);
1779 break;
1780 }
1781 /*
1782 * Give up the multicast address record to which the
1783 * membership points.
1784 */
1785 in_delmulti(imo->imo_membership[i]);
1786 /*
1787 * Remove the gap in the membership array.
1788 */
1789 for (++i; i < imo->imo_num_memberships; ++i)
1790 imo->imo_membership[i-1] = imo->imo_membership[i];
1791 --imo->imo_num_memberships;
1792 splx(s);
1793 break;
1794
1795 default:
1796 error = EOPNOTSUPP;
1797 break;
1798 }
1799
1800 /*
1801 * If all options have default values, no need to keep the mbuf.
1802 */
1803 if (imo->imo_multicast_ifp == NULL &&
1804 imo->imo_multicast_vif == -1 &&
1805 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1806 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1807 imo->imo_num_memberships == 0) {
1808 free(*imop, M_IPMOPTS);
1809 *imop = NULL;
1810 }
1811
1812 return (error);
1813}
1814
1815/*
1816 * Return the IP multicast options in response to user getsockopt().
1817 */
1818static int
1819ip_getmoptions(sopt, imo)
1820 struct sockopt *sopt;
1821 register struct ip_moptions *imo;
1822{
1823 struct in_addr addr;
1824 struct in_ifaddr *ia;
1825 int error, optval;
1826 u_char coptval;
1827
1828 error = 0;
1829 switch (sopt->sopt_name) {
1830 case IP_MULTICAST_VIF:
1831 if (imo != NULL)
1832 optval = imo->imo_multicast_vif;
1833 else
1834 optval = -1;
1835 error = sooptcopyout(sopt, &optval, sizeof optval);
1836 break;
1837
1838 case IP_MULTICAST_IF:
1839 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1840 addr.s_addr = INADDR_ANY;
1841 else if (imo->imo_multicast_addr.s_addr) {
1842 /* return the value user has set */
1843 addr = imo->imo_multicast_addr;
1844 } else {
1845 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1846 addr.s_addr = (ia == NULL) ? INADDR_ANY
1847 : IA_SIN(ia)->sin_addr.s_addr;
1848 }
1849 error = sooptcopyout(sopt, &addr, sizeof addr);
1850 break;
1851
1852 case IP_MULTICAST_TTL:
1853 if (imo == 0)
1854 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1855 else
1856 optval = coptval = imo->imo_multicast_ttl;
1857 if (sopt->sopt_valsize == 1)
1858 error = sooptcopyout(sopt, &coptval, 1);
1859 else
1860 error = sooptcopyout(sopt, &optval, sizeof optval);
1861 break;
1862
1863 case IP_MULTICAST_LOOP:
1864 if (imo == 0)
1865 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1866 else
1867 optval = coptval = imo->imo_multicast_loop;
1868 if (sopt->sopt_valsize == 1)
1869 error = sooptcopyout(sopt, &coptval, 1);
1870 else
1871 error = sooptcopyout(sopt, &optval, sizeof optval);
1872 break;
1873
1874 default:
1875 error = ENOPROTOOPT;
1876 break;
1877 }
1878 return (error);
1879}
1880
1881/*
1882 * Discard the IP multicast options.
1883 */
1884void
1885ip_freemoptions(imo)
1886 register struct ip_moptions *imo;
1887{
1888 register int i;
1889
1890 if (imo != NULL) {
1891 for (i = 0; i < imo->imo_num_memberships; ++i)
1892 in_delmulti(imo->imo_membership[i]);
1893 free(imo, M_IPMOPTS);
1894 }
1895}
1896
1897/*
1898 * Routine called from ip_output() to loop back a copy of an IP multicast
1899 * packet to the input queue of a specified interface. Note that this
1900 * calls the output routine of the loopback "driver", but with an interface
1901 * pointer that might NOT be a loopback interface -- evil, but easier than
1902 * replicating that code here.
1903 */
1904static void
1905ip_mloopback(ifp, m, dst, hlen)
1906 struct ifnet *ifp;
1907 register struct mbuf *m;
1908 register struct sockaddr_in *dst;
1909 int hlen;
1910{
1911 register struct ip *ip;
1912 struct mbuf *copym;
1913
1914 copym = m_copy(m, 0, M_COPYALL);
1915 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1916 copym = m_pullup(copym, hlen);
1917 if (copym != NULL) {
1918 /*
1919 * We don't bother to fragment if the IP length is greater
1920 * than the interface's MTU. Can this possibly matter?
1921 */
1922 ip = mtod(copym, struct ip *);
1923 HTONS(ip->ip_len);
1924 HTONS(ip->ip_off);
1925 ip->ip_sum = 0;
1926 if (ip->ip_vhl == IP_VHL_BORING) {
1927 ip->ip_sum = in_cksum_hdr(ip);
1928 } else {
1929 ip->ip_sum = in_cksum(copym, hlen);
1930 }
1931 /*
1932 * NB:
1933 * It's not clear whether there are any lingering
1934 * reentrancy problems in other areas which might
1935 * be exposed by using ip_input directly (in
1936 * particular, everything which modifies the packet
1937 * in-place). Yet another option is using the
1938 * protosw directly to deliver the looped back
1939 * packet. For the moment, we'll err on the side
1940 * of safety by using if_simloop().
1941 */
1942#if 1 /* XXX */
1943 if (dst->sin_family != AF_INET) {
1944 printf("ip_mloopback: bad address family %d\n",
1945 dst->sin_family);
1946 dst->sin_family = AF_INET;
1947 }
1948#endif
1949
1950#ifdef notdef
1951 copym->m_pkthdr.rcvif = ifp;
1952 ip_input(copym);
1953#else
1954 /* if the checksum hasn't been computed, mark it as valid */
1955 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1956 copym->m_pkthdr.csum_flags |=
1957 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1958 copym->m_pkthdr.csum_data = 0xffff;
1959 }
1960 if_simloop(ifp, copym, dst->sin_family, 0);
1961#endif
1962 }
1963}