Deleted Added
full compact
ip_output.c (57855) ip_output.c (58698)
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
34 * $FreeBSD: head/sys/netinet/ip_output.c 57855 2000-03-09 14:57:16Z shin $
34 * $FreeBSD: head/sys/netinet/ip_output.c 58698 2000-03-27 19:14:27Z jlemon $
35 */
36
37#define _IP_VHL
38
39#include "opt_ipfw.h"
40#include "opt_ipdn.h"
41#include "opt_ipdivert.h"
42#include "opt_ipfilter.h"
43#include "opt_ipsec.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/kernel.h>
48#include <sys/malloc.h>
49#include <sys/mbuf.h>
50#include <sys/protosw.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#include <sys/proc.h>
54
55#include <net/if.h>
56#include <net/route.h>
57
58#include <netinet/in.h>
59#include <netinet/in_systm.h>
60#include <netinet/ip.h>
61#include <netinet/in_pcb.h>
62#include <netinet/in_var.h>
63#include <netinet/ip_var.h>
64
65#include "faith.h"
66
67#ifdef vax
68#include <machine/mtpr.h>
69#endif
70#include <machine/in_cksum.h>
71
72static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
73
74#ifdef IPSEC
75#include <netinet6/ipsec.h>
76#include <netkey/key.h>
77#ifdef IPSEC_DEBUG
78#include <netkey/key_debug.h>
79#else
80#define KEYDEBUG(lev,arg)
81#endif
82#endif /*IPSEC*/
83
84#include <netinet/ip_fw.h>
85
86#ifdef DUMMYNET
87#include <netinet/ip_dummynet.h>
88#endif
89
90#ifdef IPFIREWALL_FORWARD_DEBUG
91#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
92 (ntohl(a.s_addr)>>16)&0xFF,\
93 (ntohl(a.s_addr)>>8)&0xFF,\
94 (ntohl(a.s_addr))&0xFF);
95#endif
96
97u_short ip_id;
98
35 */
36
37#define _IP_VHL
38
39#include "opt_ipfw.h"
40#include "opt_ipdn.h"
41#include "opt_ipdivert.h"
42#include "opt_ipfilter.h"
43#include "opt_ipsec.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/kernel.h>
48#include <sys/malloc.h>
49#include <sys/mbuf.h>
50#include <sys/protosw.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#include <sys/proc.h>
54
55#include <net/if.h>
56#include <net/route.h>
57
58#include <netinet/in.h>
59#include <netinet/in_systm.h>
60#include <netinet/ip.h>
61#include <netinet/in_pcb.h>
62#include <netinet/in_var.h>
63#include <netinet/ip_var.h>
64
65#include "faith.h"
66
67#ifdef vax
68#include <machine/mtpr.h>
69#endif
70#include <machine/in_cksum.h>
71
72static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
73
74#ifdef IPSEC
75#include <netinet6/ipsec.h>
76#include <netkey/key.h>
77#ifdef IPSEC_DEBUG
78#include <netkey/key_debug.h>
79#else
80#define KEYDEBUG(lev,arg)
81#endif
82#endif /*IPSEC*/
83
84#include <netinet/ip_fw.h>
85
86#ifdef DUMMYNET
87#include <netinet/ip_dummynet.h>
88#endif
89
90#ifdef IPFIREWALL_FORWARD_DEBUG
91#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
92 (ntohl(a.s_addr)>>16)&0xFF,\
93 (ntohl(a.s_addr)>>8)&0xFF,\
94 (ntohl(a.s_addr))&0xFF);
95#endif
96
97u_short ip_id;
98
99static void in_delayed_cksum(struct mbuf *m);
99static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
100static void ip_mloopback
101 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
102static int ip_getmoptions
103 __P((struct sockopt *, struct ip_moptions *));
104static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
105static int ip_setmoptions
106 __P((struct sockopt *, struct ip_moptions **));
107
108int ip_optcopy __P((struct ip *, struct ip *));
109extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
110
111
112extern struct protosw inetsw[];
113
114/*
115 * IP output. The packet in mbuf chain m contains a skeletal IP
116 * header (with len, off, ttl, proto, tos, src, dst).
117 * The mbuf chain containing the packet will be freed.
118 * The mbuf opt, if present, will not be freed.
119 */
120int
121ip_output(m0, opt, ro, flags, imo)
122 struct mbuf *m0;
123 struct mbuf *opt;
124 struct route *ro;
125 int flags;
126 struct ip_moptions *imo;
127{
128 struct ip *ip, *mhip;
129 struct ifnet *ifp;
130 struct mbuf *m = m0;
131 int hlen = sizeof (struct ip);
132 int len, off, error = 0;
133 struct sockaddr_in *dst;
134 struct in_ifaddr *ia;
100static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
101static void ip_mloopback
102 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
103static int ip_getmoptions
104 __P((struct sockopt *, struct ip_moptions *));
105static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
106static int ip_setmoptions
107 __P((struct sockopt *, struct ip_moptions **));
108
109int ip_optcopy __P((struct ip *, struct ip *));
110extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
111
112
113extern struct protosw inetsw[];
114
115/*
116 * IP output. The packet in mbuf chain m contains a skeletal IP
117 * header (with len, off, ttl, proto, tos, src, dst).
118 * The mbuf chain containing the packet will be freed.
119 * The mbuf opt, if present, will not be freed.
120 */
121int
122ip_output(m0, opt, ro, flags, imo)
123 struct mbuf *m0;
124 struct mbuf *opt;
125 struct route *ro;
126 int flags;
127 struct ip_moptions *imo;
128{
129 struct ip *ip, *mhip;
130 struct ifnet *ifp;
131 struct mbuf *m = m0;
132 int hlen = sizeof (struct ip);
133 int len, off, error = 0;
134 struct sockaddr_in *dst;
135 struct in_ifaddr *ia;
135 int isbroadcast;
136 int isbroadcast, sw_csum;
136#ifdef IPSEC
137 struct route iproute;
138 struct socket *so = NULL;
139 struct secpolicy *sp = NULL;
140#endif
141 u_int16_t divert_cookie; /* firewall cookie */
142#ifdef IPFIREWALL_FORWARD
143 int fwd_rewrite_src = 0;
144#endif
145 struct ip_fw_chain *rule = NULL;
146
147#ifdef IPDIVERT
148 /* Get and reset firewall cookie */
149 divert_cookie = ip_divert_cookie;
150 ip_divert_cookie = 0;
151#else
152 divert_cookie = 0;
153#endif
154
155 /*
156 * NOTE: If IP_SOCKINMRCVIF flag is set, 'socket *' is kept in
157 * m->m_pkthdr.rcvif for later IPSEC check. In this case,
158 * m->m_pkthdr will be NULL cleared after the contents is saved in
159 * 'so'.
160 * NULL clearance of rcvif should be natural because the packet should
161 * have been sent from my own socket and has no rcvif in this case.
162 * It is also necessary because someone might consider it as
163 * 'ifnet *', and cause SEGV.
164 */
165#if defined(IPFIREWALL) && defined(DUMMYNET)
166 /*
167 * dummynet packet are prepended a vestigial mbuf with
168 * m_type = MT_DUMMYNET and m_data pointing to the matching
169 * rule.
170 */
171 if (m->m_type == MT_DUMMYNET) {
172 /*
173 * the packet was already tagged, so part of the
174 * processing was already done, and we need to go down.
175 * Get parameters from the header.
176 */
177 rule = (struct ip_fw_chain *)(m->m_data) ;
178 opt = NULL ;
179 ro = & ( ((struct dn_pkt *)m)->ro ) ;
180 imo = NULL ;
181 dst = ((struct dn_pkt *)m)->dn_dst ;
182 ifp = ((struct dn_pkt *)m)->ifp ;
183 flags = ((struct dn_pkt *)m)->flags ;
184
185 m0 = m = m->m_next ;
186#ifdef IPSEC
187 if ((flags & IP_SOCKINMRCVIF) != 0) {
188 so = (struct socket *)m->m_pkthdr.rcvif;
189 m->m_pkthdr.rcvif = NULL;
190 }
191#endif
192 ip = mtod(m, struct ip *);
193 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
194 goto sendit;
195 } else
196 rule = NULL ;
197#endif
198#ifdef IPSEC
199 if ((flags & IP_SOCKINMRCVIF) != 0) {
200 so = (struct socket *)m->m_pkthdr.rcvif;
201 m->m_pkthdr.rcvif = NULL;
202 }
203#endif
204
205#ifdef DIAGNOSTIC
206 if ((m->m_flags & M_PKTHDR) == 0)
207 panic("ip_output no HDR");
208 if (!ro)
209 panic("ip_output no route, proto = %d",
210 mtod(m, struct ip *)->ip_p);
211#endif
212 if (opt) {
213 m = ip_insertoptions(m, opt, &len);
214 hlen = len;
215 }
216 ip = mtod(m, struct ip *);
217 /*
218 * Fill in IP header.
219 */
220 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
221 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
222 ip->ip_off &= IP_DF;
223 ip->ip_id = htons(ip_id++);
224 ipstat.ips_localout++;
225 } else {
226 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
227 }
228
229 dst = (struct sockaddr_in *)&ro->ro_dst;
230 /*
231 * If there is a cached route,
232 * check that it is to the same destination
233 * and is still up. If not, free it and try again.
234 */
235 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
236 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
237 RTFREE(ro->ro_rt);
238 ro->ro_rt = (struct rtentry *)0;
239 }
240 if (ro->ro_rt == 0) {
241 dst->sin_family = AF_INET;
242 dst->sin_len = sizeof(*dst);
243 dst->sin_addr = ip->ip_dst;
244 }
245 /*
246 * If routing to interface only,
247 * short circuit routing lookup.
248 */
249#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
250#define sintosa(sin) ((struct sockaddr *)(sin))
251 if (flags & IP_ROUTETOIF) {
252 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
253 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
254 ipstat.ips_noroute++;
255 error = ENETUNREACH;
256 goto bad;
257 }
258 ifp = ia->ia_ifp;
259 ip->ip_ttl = 1;
260 isbroadcast = in_broadcast(dst->sin_addr, ifp);
261 } else {
262 /*
263 * If this is the case, we probably don't want to allocate
264 * a protocol-cloned route since we didn't get one from the
265 * ULP. This lets TCP do its thing, while not burdening
266 * forwarding or ICMP with the overhead of cloning a route.
267 * Of course, we still want to do any cloning requested by
268 * the link layer, as this is probably required in all cases
269 * for correct operation (as it is for ARP).
270 */
271 if (ro->ro_rt == 0)
272 rtalloc_ign(ro, RTF_PRCLONING);
273 if (ro->ro_rt == 0) {
274 ipstat.ips_noroute++;
275 error = EHOSTUNREACH;
276 goto bad;
277 }
278 ia = ifatoia(ro->ro_rt->rt_ifa);
279 ifp = ro->ro_rt->rt_ifp;
280 ro->ro_rt->rt_use++;
281 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
282 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
283 if (ro->ro_rt->rt_flags & RTF_HOST)
284 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
285 else
286 isbroadcast = in_broadcast(dst->sin_addr, ifp);
287 }
288 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
289 struct in_multi *inm;
290
291 m->m_flags |= M_MCAST;
292 /*
293 * IP destination address is multicast. Make sure "dst"
294 * still points to the address in "ro". (It may have been
295 * changed to point to a gateway address, above.)
296 */
297 dst = (struct sockaddr_in *)&ro->ro_dst;
298 /*
299 * See if the caller provided any multicast options
300 */
301 if (imo != NULL) {
302 ip->ip_ttl = imo->imo_multicast_ttl;
303 if (imo->imo_multicast_ifp != NULL)
304 ifp = imo->imo_multicast_ifp;
305 if (imo->imo_multicast_vif != -1)
306 ip->ip_src.s_addr =
307 ip_mcast_src(imo->imo_multicast_vif);
308 } else
309 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
310 /*
311 * Confirm that the outgoing interface supports multicast.
312 */
313 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
314 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
315 ipstat.ips_noroute++;
316 error = ENETUNREACH;
317 goto bad;
318 }
319 }
320 /*
321 * If source address not specified yet, use address
322 * of outgoing interface.
323 */
324 if (ip->ip_src.s_addr == INADDR_ANY) {
325 register struct in_ifaddr *ia1;
326
327 for (ia1 = in_ifaddrhead.tqh_first; ia1;
328 ia1 = ia1->ia_link.tqe_next)
329 if (ia1->ia_ifp == ifp) {
330 ip->ip_src = IA_SIN(ia1)->sin_addr;
331 break;
332 }
333 }
334
335 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
336 if (inm != NULL &&
337 (imo == NULL || imo->imo_multicast_loop)) {
338 /*
339 * If we belong to the destination multicast group
340 * on the outgoing interface, and the caller did not
341 * forbid loopback, loop back a copy.
342 */
343 ip_mloopback(ifp, m, dst, hlen);
344 }
345 else {
346 /*
347 * If we are acting as a multicast router, perform
348 * multicast forwarding as if the packet had just
349 * arrived on the interface to which we are about
350 * to send. The multicast forwarding function
351 * recursively calls this function, using the
352 * IP_FORWARDING flag to prevent infinite recursion.
353 *
354 * Multicasts that are looped back by ip_mloopback(),
355 * above, will be forwarded by the ip_input() routine,
356 * if necessary.
357 */
358 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
359 /*
360 * Check if rsvp daemon is running. If not, don't
361 * set ip_moptions. This ensures that the packet
362 * is multicast and not just sent down one link
363 * as prescribed by rsvpd.
364 */
365 if (!rsvp_on)
366 imo = NULL;
367 if (ip_mforward(ip, ifp, m, imo) != 0) {
368 m_freem(m);
369 goto done;
370 }
371 }
372 }
373
374 /*
375 * Multicasts with a time-to-live of zero may be looped-
376 * back, above, but must not be transmitted on a network.
377 * Also, multicasts addressed to the loopback interface
378 * are not sent -- the above call to ip_mloopback() will
379 * loop back a copy if this host actually belongs to the
380 * destination group on the loopback interface.
381 */
382 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
383 m_freem(m);
384 goto done;
385 }
386
387 goto sendit;
388 }
389#ifndef notdef
390 /*
391 * If source address not specified yet, use address
392 * of outgoing interface.
393 */
394 if (ip->ip_src.s_addr == INADDR_ANY) {
395 ip->ip_src = IA_SIN(ia)->sin_addr;
396#ifdef IPFIREWALL_FORWARD
397 /* Keep note that we did this - if the firewall changes
398 * the next-hop, our interface may change, changing the
399 * default source IP. It's a shame so much effort happens
400 * twice. Oh well.
401 */
402 fwd_rewrite_src++;
403#endif /* IPFIREWALL_FORWARD */
404 }
405#endif /* notdef */
406 /*
407 * Verify that we have any chance at all of being able to queue
408 * the packet or packet fragments
409 */
410 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
411 ifp->if_snd.ifq_maxlen) {
412 error = ENOBUFS;
413 goto bad;
414 }
415
416 /*
417 * Look for broadcast address and
418 * and verify user is allowed to send
419 * such a packet.
420 */
421 if (isbroadcast) {
422 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
423 error = EADDRNOTAVAIL;
424 goto bad;
425 }
426 if ((flags & IP_ALLOWBROADCAST) == 0) {
427 error = EACCES;
428 goto bad;
429 }
430 /* don't allow broadcast messages to be fragmented */
431 if ((u_short)ip->ip_len > ifp->if_mtu) {
432 error = EMSGSIZE;
433 goto bad;
434 }
435 m->m_flags |= M_BCAST;
436 } else {
437 m->m_flags &= ~M_BCAST;
438 }
439
440sendit:
441 /*
442 * IpHack's section.
443 * - Xlate: translate packet's addr/port (NAT).
444 * - Firewall: deny/allow/etc.
445 * - Wrap: fake packet's addr/port <unimpl.>
446 * - Encapsulate: put it in another IP and send out. <unimp.>
447 */
448 if (fr_checkp) {
449 struct mbuf *m1 = m;
450
451 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1)
452 goto done;
453 ip = mtod(m = m1, struct ip *);
454 }
455
456 /*
457 * Check with the firewall...
458 */
459 if (fw_enable && ip_fw_chk_ptr) {
460 struct sockaddr_in *old = dst;
461
462 off = (*ip_fw_chk_ptr)(&ip,
463 hlen, ifp, &divert_cookie, &m, &rule, &dst);
464 /*
465 * On return we must do the following:
466 * m == NULL -> drop the pkt
467 * 1<=off<= 0xffff -> DIVERT
468 * (off & 0x10000) -> send to a DUMMYNET pipe
469 * (off & 0x20000) -> TEE the packet
470 * dst != old -> IPFIREWALL_FORWARD
471 * off==0, dst==old -> accept
472 * If some of the above modules is not compiled in, then
473 * we should't have to check the corresponding condition
474 * (because the ipfw control socket should not accept
475 * unsupported rules), but better play safe and drop
476 * packets in case of doubt.
477 */
478 if (!m) { /* firewall said to reject */
479 error = EACCES;
480 goto done;
481 }
482 if (off == 0 && dst == old) /* common case */
483 goto pass ;
484#ifdef DUMMYNET
485 if ((off & IP_FW_PORT_DYNT_FLAG) != 0) {
486 /*
487 * pass the pkt to dummynet. Need to include
488 * pipe number, m, ifp, ro, dst because these are
489 * not recomputed in the next pass.
490 * All other parameters have been already used and
491 * so they are not needed anymore.
492 * XXX note: if the ifp or ro entry are deleted
493 * while a pkt is in dummynet, we are in trouble!
494 */
495 dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,dst,rule,
496 flags);
497 goto done;
498 }
499#endif
500#ifdef IPDIVERT
501 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
502 struct mbuf *clone = NULL;
503
504 /* Clone packet if we're doing a 'tee' */
505 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
506 clone = m_dup(m, M_DONTWAIT);
507
508 /* Restore packet header fields to original values */
509 HTONS(ip->ip_len);
510 HTONS(ip->ip_off);
511
512 /* Deliver packet to divert input routine */
513 ip_divert_cookie = divert_cookie;
514 divert_packet(m, 0, off & 0xffff);
515
516 /* If 'tee', continue with original packet */
517 if (clone != NULL) {
518 m = clone;
519 ip = mtod(m, struct ip *);
520 goto pass;
521 }
522 goto done;
523 }
524#endif
525
526#ifdef IPFIREWALL_FORWARD
527 /* Here we check dst to make sure it's directly reachable on the
528 * interface we previously thought it was.
529 * If it isn't (which may be likely in some situations) we have
530 * to re-route it (ie, find a route for the next-hop and the
531 * associated interface) and set them here. This is nested
532 * forwarding which in most cases is undesirable, except where
533 * such control is nigh impossible. So we do it here.
534 * And I'm babbling.
535 */
536 if (off == 0 && old != dst) {
537 struct in_ifaddr *ia;
538
539 /* It's changed... */
540 /* There must be a better way to do this next line... */
541 static struct route sro_fwd, *ro_fwd = &sro_fwd;
542#ifdef IPFIREWALL_FORWARD_DEBUG
543 printf("IPFIREWALL_FORWARD: New dst ip: ");
544 print_ip(dst->sin_addr);
545 printf("\n");
546#endif
547 /*
548 * We need to figure out if we have been forwarded
549 * to a local socket. If so then we should somehow
550 * "loop back" to ip_input, and get directed to the
551 * PCB as if we had received this packet. This is
552 * because it may be dificult to identify the packets
553 * you want to forward until they are being output
554 * and have selected an interface. (e.g. locally
555 * initiated packets) If we used the loopback inteface,
556 * we would not be able to control what happens
557 * as the packet runs through ip_input() as
558 * it is done through a ISR.
559 */
560 for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
561 ia = TAILQ_NEXT(ia, ia_link)) {
562 /*
563 * If the addr to forward to is one
564 * of ours, we pretend to
565 * be the destination for this packet.
566 */
567 if (IA_SIN(ia)->sin_addr.s_addr ==
568 dst->sin_addr.s_addr)
569 break;
570 }
571 if (ia) {
572 /* tell ip_input "dont filter" */
573 ip_fw_fwd_addr = dst;
574 if (m->m_pkthdr.rcvif == NULL)
575 m->m_pkthdr.rcvif = ifunit("lo0");
576 ip->ip_len = htons((u_short)ip->ip_len);
577 ip->ip_off = htons((u_short)ip->ip_off);
578 ip->ip_sum = 0;
579 if (ip->ip_vhl == IP_VHL_BORING) {
580 ip->ip_sum = in_cksum_hdr(ip);
581 } else {
582 ip->ip_sum = in_cksum(m, hlen);
583 }
584 ip_input(m);
585 goto done;
586 }
587 /* Some of the logic for this was
588 * nicked from above.
589 *
590 * This rewrites the cached route in a local PCB.
591 * Is this what we want to do?
592 */
593 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
594
595 ro_fwd->ro_rt = 0;
596 rtalloc_ign(ro_fwd, RTF_PRCLONING);
597
598 if (ro_fwd->ro_rt == 0) {
599 ipstat.ips_noroute++;
600 error = EHOSTUNREACH;
601 goto bad;
602 }
603
604 ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
605 ifp = ro_fwd->ro_rt->rt_ifp;
606 ro_fwd->ro_rt->rt_use++;
607 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
608 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
609 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
610 isbroadcast =
611 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
612 else
613 isbroadcast = in_broadcast(dst->sin_addr, ifp);
614 RTFREE(ro->ro_rt);
615 ro->ro_rt = ro_fwd->ro_rt;
616 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
617
618 /*
619 * If we added a default src ip earlier,
620 * which would have been gotten from the-then
621 * interface, do it again, from the new one.
622 */
623 if (fwd_rewrite_src)
624 ip->ip_src = IA_SIN(ia)->sin_addr;
625 goto pass ;
626 }
627#endif /* IPFIREWALL_FORWARD */
628 /*
629 * if we get here, none of the above matches, and
630 * we have to drop the pkt
631 */
632 m_freem(m);
633 error = EACCES; /* not sure this is the right error msg */
634 goto done;
635 }
636
637pass:
638#ifdef IPSEC
639 /* get SP for this packet */
640 if (so == NULL)
641 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
642 else
643 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
644
645 if (sp == NULL) {
646 ipsecstat.out_inval++;
647 goto bad;
648 }
649
650 error = 0;
651
652 /* check policy */
653 switch (sp->policy) {
654 case IPSEC_POLICY_DISCARD:
655 /*
656 * This packet is just discarded.
657 */
658 ipsecstat.out_polvio++;
659 goto bad;
660
661 case IPSEC_POLICY_BYPASS:
662 case IPSEC_POLICY_NONE:
663 /* no need to do IPsec. */
664 goto skip_ipsec;
665
666 case IPSEC_POLICY_IPSEC:
667 if (sp->req == NULL) {
668 /* XXX should be panic ? */
669 printf("ip_output: No IPsec request specified.\n");
670 error = EINVAL;
671 goto bad;
672 }
673 break;
674
675 case IPSEC_POLICY_ENTRUST:
676 default:
677 printf("ip_output: Invalid policy found. %d\n", sp->policy);
678 }
679
680 ip->ip_len = htons((u_short)ip->ip_len);
681 ip->ip_off = htons((u_short)ip->ip_off);
682 ip->ip_sum = 0;
683
684 {
685 struct ipsec_output_state state;
686 bzero(&state, sizeof(state));
687 state.m = m;
688 if (flags & IP_ROUTETOIF) {
689 state.ro = &iproute;
690 bzero(&iproute, sizeof(iproute));
691 } else
692 state.ro = ro;
693 state.dst = (struct sockaddr *)dst;
694
137#ifdef IPSEC
138 struct route iproute;
139 struct socket *so = NULL;
140 struct secpolicy *sp = NULL;
141#endif
142 u_int16_t divert_cookie; /* firewall cookie */
143#ifdef IPFIREWALL_FORWARD
144 int fwd_rewrite_src = 0;
145#endif
146 struct ip_fw_chain *rule = NULL;
147
148#ifdef IPDIVERT
149 /* Get and reset firewall cookie */
150 divert_cookie = ip_divert_cookie;
151 ip_divert_cookie = 0;
152#else
153 divert_cookie = 0;
154#endif
155
156 /*
157 * NOTE: If IP_SOCKINMRCVIF flag is set, 'socket *' is kept in
158 * m->m_pkthdr.rcvif for later IPSEC check. In this case,
159 * m->m_pkthdr will be NULL cleared after the contents is saved in
160 * 'so'.
161 * NULL clearance of rcvif should be natural because the packet should
162 * have been sent from my own socket and has no rcvif in this case.
163 * It is also necessary because someone might consider it as
164 * 'ifnet *', and cause SEGV.
165 */
166#if defined(IPFIREWALL) && defined(DUMMYNET)
167 /*
168 * dummynet packet are prepended a vestigial mbuf with
169 * m_type = MT_DUMMYNET and m_data pointing to the matching
170 * rule.
171 */
172 if (m->m_type == MT_DUMMYNET) {
173 /*
174 * the packet was already tagged, so part of the
175 * processing was already done, and we need to go down.
176 * Get parameters from the header.
177 */
178 rule = (struct ip_fw_chain *)(m->m_data) ;
179 opt = NULL ;
180 ro = & ( ((struct dn_pkt *)m)->ro ) ;
181 imo = NULL ;
182 dst = ((struct dn_pkt *)m)->dn_dst ;
183 ifp = ((struct dn_pkt *)m)->ifp ;
184 flags = ((struct dn_pkt *)m)->flags ;
185
186 m0 = m = m->m_next ;
187#ifdef IPSEC
188 if ((flags & IP_SOCKINMRCVIF) != 0) {
189 so = (struct socket *)m->m_pkthdr.rcvif;
190 m->m_pkthdr.rcvif = NULL;
191 }
192#endif
193 ip = mtod(m, struct ip *);
194 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
195 goto sendit;
196 } else
197 rule = NULL ;
198#endif
199#ifdef IPSEC
200 if ((flags & IP_SOCKINMRCVIF) != 0) {
201 so = (struct socket *)m->m_pkthdr.rcvif;
202 m->m_pkthdr.rcvif = NULL;
203 }
204#endif
205
206#ifdef DIAGNOSTIC
207 if ((m->m_flags & M_PKTHDR) == 0)
208 panic("ip_output no HDR");
209 if (!ro)
210 panic("ip_output no route, proto = %d",
211 mtod(m, struct ip *)->ip_p);
212#endif
213 if (opt) {
214 m = ip_insertoptions(m, opt, &len);
215 hlen = len;
216 }
217 ip = mtod(m, struct ip *);
218 /*
219 * Fill in IP header.
220 */
221 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
222 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
223 ip->ip_off &= IP_DF;
224 ip->ip_id = htons(ip_id++);
225 ipstat.ips_localout++;
226 } else {
227 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
228 }
229
230 dst = (struct sockaddr_in *)&ro->ro_dst;
231 /*
232 * If there is a cached route,
233 * check that it is to the same destination
234 * and is still up. If not, free it and try again.
235 */
236 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
237 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
238 RTFREE(ro->ro_rt);
239 ro->ro_rt = (struct rtentry *)0;
240 }
241 if (ro->ro_rt == 0) {
242 dst->sin_family = AF_INET;
243 dst->sin_len = sizeof(*dst);
244 dst->sin_addr = ip->ip_dst;
245 }
246 /*
247 * If routing to interface only,
248 * short circuit routing lookup.
249 */
250#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
251#define sintosa(sin) ((struct sockaddr *)(sin))
252 if (flags & IP_ROUTETOIF) {
253 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
254 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
255 ipstat.ips_noroute++;
256 error = ENETUNREACH;
257 goto bad;
258 }
259 ifp = ia->ia_ifp;
260 ip->ip_ttl = 1;
261 isbroadcast = in_broadcast(dst->sin_addr, ifp);
262 } else {
263 /*
264 * If this is the case, we probably don't want to allocate
265 * a protocol-cloned route since we didn't get one from the
266 * ULP. This lets TCP do its thing, while not burdening
267 * forwarding or ICMP with the overhead of cloning a route.
268 * Of course, we still want to do any cloning requested by
269 * the link layer, as this is probably required in all cases
270 * for correct operation (as it is for ARP).
271 */
272 if (ro->ro_rt == 0)
273 rtalloc_ign(ro, RTF_PRCLONING);
274 if (ro->ro_rt == 0) {
275 ipstat.ips_noroute++;
276 error = EHOSTUNREACH;
277 goto bad;
278 }
279 ia = ifatoia(ro->ro_rt->rt_ifa);
280 ifp = ro->ro_rt->rt_ifp;
281 ro->ro_rt->rt_use++;
282 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
283 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
284 if (ro->ro_rt->rt_flags & RTF_HOST)
285 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
286 else
287 isbroadcast = in_broadcast(dst->sin_addr, ifp);
288 }
289 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
290 struct in_multi *inm;
291
292 m->m_flags |= M_MCAST;
293 /*
294 * IP destination address is multicast. Make sure "dst"
295 * still points to the address in "ro". (It may have been
296 * changed to point to a gateway address, above.)
297 */
298 dst = (struct sockaddr_in *)&ro->ro_dst;
299 /*
300 * See if the caller provided any multicast options
301 */
302 if (imo != NULL) {
303 ip->ip_ttl = imo->imo_multicast_ttl;
304 if (imo->imo_multicast_ifp != NULL)
305 ifp = imo->imo_multicast_ifp;
306 if (imo->imo_multicast_vif != -1)
307 ip->ip_src.s_addr =
308 ip_mcast_src(imo->imo_multicast_vif);
309 } else
310 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
311 /*
312 * Confirm that the outgoing interface supports multicast.
313 */
314 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
315 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
316 ipstat.ips_noroute++;
317 error = ENETUNREACH;
318 goto bad;
319 }
320 }
321 /*
322 * If source address not specified yet, use address
323 * of outgoing interface.
324 */
325 if (ip->ip_src.s_addr == INADDR_ANY) {
326 register struct in_ifaddr *ia1;
327
328 for (ia1 = in_ifaddrhead.tqh_first; ia1;
329 ia1 = ia1->ia_link.tqe_next)
330 if (ia1->ia_ifp == ifp) {
331 ip->ip_src = IA_SIN(ia1)->sin_addr;
332 break;
333 }
334 }
335
336 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
337 if (inm != NULL &&
338 (imo == NULL || imo->imo_multicast_loop)) {
339 /*
340 * If we belong to the destination multicast group
341 * on the outgoing interface, and the caller did not
342 * forbid loopback, loop back a copy.
343 */
344 ip_mloopback(ifp, m, dst, hlen);
345 }
346 else {
347 /*
348 * If we are acting as a multicast router, perform
349 * multicast forwarding as if the packet had just
350 * arrived on the interface to which we are about
351 * to send. The multicast forwarding function
352 * recursively calls this function, using the
353 * IP_FORWARDING flag to prevent infinite recursion.
354 *
355 * Multicasts that are looped back by ip_mloopback(),
356 * above, will be forwarded by the ip_input() routine,
357 * if necessary.
358 */
359 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
360 /*
361 * Check if rsvp daemon is running. If not, don't
362 * set ip_moptions. This ensures that the packet
363 * is multicast and not just sent down one link
364 * as prescribed by rsvpd.
365 */
366 if (!rsvp_on)
367 imo = NULL;
368 if (ip_mforward(ip, ifp, m, imo) != 0) {
369 m_freem(m);
370 goto done;
371 }
372 }
373 }
374
375 /*
376 * Multicasts with a time-to-live of zero may be looped-
377 * back, above, but must not be transmitted on a network.
378 * Also, multicasts addressed to the loopback interface
379 * are not sent -- the above call to ip_mloopback() will
380 * loop back a copy if this host actually belongs to the
381 * destination group on the loopback interface.
382 */
383 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
384 m_freem(m);
385 goto done;
386 }
387
388 goto sendit;
389 }
390#ifndef notdef
391 /*
392 * If source address not specified yet, use address
393 * of outgoing interface.
394 */
395 if (ip->ip_src.s_addr == INADDR_ANY) {
396 ip->ip_src = IA_SIN(ia)->sin_addr;
397#ifdef IPFIREWALL_FORWARD
398 /* Keep note that we did this - if the firewall changes
399 * the next-hop, our interface may change, changing the
400 * default source IP. It's a shame so much effort happens
401 * twice. Oh well.
402 */
403 fwd_rewrite_src++;
404#endif /* IPFIREWALL_FORWARD */
405 }
406#endif /* notdef */
407 /*
408 * Verify that we have any chance at all of being able to queue
409 * the packet or packet fragments
410 */
411 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
412 ifp->if_snd.ifq_maxlen) {
413 error = ENOBUFS;
414 goto bad;
415 }
416
417 /*
418 * Look for broadcast address and
419 * and verify user is allowed to send
420 * such a packet.
421 */
422 if (isbroadcast) {
423 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
424 error = EADDRNOTAVAIL;
425 goto bad;
426 }
427 if ((flags & IP_ALLOWBROADCAST) == 0) {
428 error = EACCES;
429 goto bad;
430 }
431 /* don't allow broadcast messages to be fragmented */
432 if ((u_short)ip->ip_len > ifp->if_mtu) {
433 error = EMSGSIZE;
434 goto bad;
435 }
436 m->m_flags |= M_BCAST;
437 } else {
438 m->m_flags &= ~M_BCAST;
439 }
440
441sendit:
442 /*
443 * IpHack's section.
444 * - Xlate: translate packet's addr/port (NAT).
445 * - Firewall: deny/allow/etc.
446 * - Wrap: fake packet's addr/port <unimpl.>
447 * - Encapsulate: put it in another IP and send out. <unimp.>
448 */
449 if (fr_checkp) {
450 struct mbuf *m1 = m;
451
452 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1)
453 goto done;
454 ip = mtod(m = m1, struct ip *);
455 }
456
457 /*
458 * Check with the firewall...
459 */
460 if (fw_enable && ip_fw_chk_ptr) {
461 struct sockaddr_in *old = dst;
462
463 off = (*ip_fw_chk_ptr)(&ip,
464 hlen, ifp, &divert_cookie, &m, &rule, &dst);
465 /*
466 * On return we must do the following:
467 * m == NULL -> drop the pkt
468 * 1<=off<= 0xffff -> DIVERT
469 * (off & 0x10000) -> send to a DUMMYNET pipe
470 * (off & 0x20000) -> TEE the packet
471 * dst != old -> IPFIREWALL_FORWARD
472 * off==0, dst==old -> accept
473 * If some of the above modules is not compiled in, then
474 * we should't have to check the corresponding condition
475 * (because the ipfw control socket should not accept
476 * unsupported rules), but better play safe and drop
477 * packets in case of doubt.
478 */
479 if (!m) { /* firewall said to reject */
480 error = EACCES;
481 goto done;
482 }
483 if (off == 0 && dst == old) /* common case */
484 goto pass ;
485#ifdef DUMMYNET
486 if ((off & IP_FW_PORT_DYNT_FLAG) != 0) {
487 /*
488 * pass the pkt to dummynet. Need to include
489 * pipe number, m, ifp, ro, dst because these are
490 * not recomputed in the next pass.
491 * All other parameters have been already used and
492 * so they are not needed anymore.
493 * XXX note: if the ifp or ro entry are deleted
494 * while a pkt is in dummynet, we are in trouble!
495 */
496 dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,dst,rule,
497 flags);
498 goto done;
499 }
500#endif
501#ifdef IPDIVERT
502 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
503 struct mbuf *clone = NULL;
504
505 /* Clone packet if we're doing a 'tee' */
506 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
507 clone = m_dup(m, M_DONTWAIT);
508
509 /* Restore packet header fields to original values */
510 HTONS(ip->ip_len);
511 HTONS(ip->ip_off);
512
513 /* Deliver packet to divert input routine */
514 ip_divert_cookie = divert_cookie;
515 divert_packet(m, 0, off & 0xffff);
516
517 /* If 'tee', continue with original packet */
518 if (clone != NULL) {
519 m = clone;
520 ip = mtod(m, struct ip *);
521 goto pass;
522 }
523 goto done;
524 }
525#endif
526
527#ifdef IPFIREWALL_FORWARD
528 /* Here we check dst to make sure it's directly reachable on the
529 * interface we previously thought it was.
530 * If it isn't (which may be likely in some situations) we have
531 * to re-route it (ie, find a route for the next-hop and the
532 * associated interface) and set them here. This is nested
533 * forwarding which in most cases is undesirable, except where
534 * such control is nigh impossible. So we do it here.
535 * And I'm babbling.
536 */
537 if (off == 0 && old != dst) {
538 struct in_ifaddr *ia;
539
540 /* It's changed... */
541 /* There must be a better way to do this next line... */
542 static struct route sro_fwd, *ro_fwd = &sro_fwd;
543#ifdef IPFIREWALL_FORWARD_DEBUG
544 printf("IPFIREWALL_FORWARD: New dst ip: ");
545 print_ip(dst->sin_addr);
546 printf("\n");
547#endif
548 /*
549 * We need to figure out if we have been forwarded
550 * to a local socket. If so then we should somehow
551 * "loop back" to ip_input, and get directed to the
552 * PCB as if we had received this packet. This is
553 * because it may be dificult to identify the packets
554 * you want to forward until they are being output
555 * and have selected an interface. (e.g. locally
556 * initiated packets) If we used the loopback inteface,
557 * we would not be able to control what happens
558 * as the packet runs through ip_input() as
559 * it is done through a ISR.
560 */
561 for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
562 ia = TAILQ_NEXT(ia, ia_link)) {
563 /*
564 * If the addr to forward to is one
565 * of ours, we pretend to
566 * be the destination for this packet.
567 */
568 if (IA_SIN(ia)->sin_addr.s_addr ==
569 dst->sin_addr.s_addr)
570 break;
571 }
572 if (ia) {
573 /* tell ip_input "dont filter" */
574 ip_fw_fwd_addr = dst;
575 if (m->m_pkthdr.rcvif == NULL)
576 m->m_pkthdr.rcvif = ifunit("lo0");
577 ip->ip_len = htons((u_short)ip->ip_len);
578 ip->ip_off = htons((u_short)ip->ip_off);
579 ip->ip_sum = 0;
580 if (ip->ip_vhl == IP_VHL_BORING) {
581 ip->ip_sum = in_cksum_hdr(ip);
582 } else {
583 ip->ip_sum = in_cksum(m, hlen);
584 }
585 ip_input(m);
586 goto done;
587 }
588 /* Some of the logic for this was
589 * nicked from above.
590 *
591 * This rewrites the cached route in a local PCB.
592 * Is this what we want to do?
593 */
594 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
595
596 ro_fwd->ro_rt = 0;
597 rtalloc_ign(ro_fwd, RTF_PRCLONING);
598
599 if (ro_fwd->ro_rt == 0) {
600 ipstat.ips_noroute++;
601 error = EHOSTUNREACH;
602 goto bad;
603 }
604
605 ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
606 ifp = ro_fwd->ro_rt->rt_ifp;
607 ro_fwd->ro_rt->rt_use++;
608 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
609 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
610 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
611 isbroadcast =
612 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
613 else
614 isbroadcast = in_broadcast(dst->sin_addr, ifp);
615 RTFREE(ro->ro_rt);
616 ro->ro_rt = ro_fwd->ro_rt;
617 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
618
619 /*
620 * If we added a default src ip earlier,
621 * which would have been gotten from the-then
622 * interface, do it again, from the new one.
623 */
624 if (fwd_rewrite_src)
625 ip->ip_src = IA_SIN(ia)->sin_addr;
626 goto pass ;
627 }
628#endif /* IPFIREWALL_FORWARD */
629 /*
630 * if we get here, none of the above matches, and
631 * we have to drop the pkt
632 */
633 m_freem(m);
634 error = EACCES; /* not sure this is the right error msg */
635 goto done;
636 }
637
638pass:
639#ifdef IPSEC
640 /* get SP for this packet */
641 if (so == NULL)
642 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
643 else
644 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
645
646 if (sp == NULL) {
647 ipsecstat.out_inval++;
648 goto bad;
649 }
650
651 error = 0;
652
653 /* check policy */
654 switch (sp->policy) {
655 case IPSEC_POLICY_DISCARD:
656 /*
657 * This packet is just discarded.
658 */
659 ipsecstat.out_polvio++;
660 goto bad;
661
662 case IPSEC_POLICY_BYPASS:
663 case IPSEC_POLICY_NONE:
664 /* no need to do IPsec. */
665 goto skip_ipsec;
666
667 case IPSEC_POLICY_IPSEC:
668 if (sp->req == NULL) {
669 /* XXX should be panic ? */
670 printf("ip_output: No IPsec request specified.\n");
671 error = EINVAL;
672 goto bad;
673 }
674 break;
675
676 case IPSEC_POLICY_ENTRUST:
677 default:
678 printf("ip_output: Invalid policy found. %d\n", sp->policy);
679 }
680
681 ip->ip_len = htons((u_short)ip->ip_len);
682 ip->ip_off = htons((u_short)ip->ip_off);
683 ip->ip_sum = 0;
684
685 {
686 struct ipsec_output_state state;
687 bzero(&state, sizeof(state));
688 state.m = m;
689 if (flags & IP_ROUTETOIF) {
690 state.ro = &iproute;
691 bzero(&iproute, sizeof(iproute));
692 } else
693 state.ro = ro;
694 state.dst = (struct sockaddr *)dst;
695
696 /*
697 * XXX
698 * delayed checksums are not currently compatible with IPsec
699 */
700 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
701 in_delayed_cksum(m);
702 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
703 }
704
695 error = ipsec4_output(&state, sp, flags);
696
697 m = state.m;
698 if (flags & IP_ROUTETOIF) {
699 /*
700 * if we have tunnel mode SA, we may need to ignore
701 * IP_ROUTETOIF.
702 */
703 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
704 flags &= ~IP_ROUTETOIF;
705 ro = state.ro;
706 }
707 } else
708 ro = state.ro;
709 dst = (struct sockaddr_in *)state.dst;
710 if (error) {
711 /* mbuf is already reclaimed in ipsec4_output. */
712 m0 = NULL;
713 switch (error) {
714 case EHOSTUNREACH:
715 case ENETUNREACH:
716 case EMSGSIZE:
717 case ENOBUFS:
718 case ENOMEM:
719 break;
720 default:
721 printf("ip4_output (ipsec): error code %d\n", error);
722 /*fall through*/
723 case ENOENT:
724 /* don't show these error codes to the user */
725 error = 0;
726 break;
727 }
728 goto bad;
729 }
730 }
731
732 /* be sure to update variables that are affected by ipsec4_output() */
733 ip = mtod(m, struct ip *);
734#ifdef _IP_VHL
735 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
736#else
737 hlen = ip->ip_hl << 2;
738#endif
739 if (ro->ro_rt == NULL) {
740 if ((flags & IP_ROUTETOIF) == 0) {
741 printf("ip_output: "
742 "can't update route after IPsec processing\n");
743 error = EHOSTUNREACH; /*XXX*/
744 goto bad;
745 }
746 } else {
747 /* nobody uses ia beyond here */
748 ifp = ro->ro_rt->rt_ifp;
749 }
750
751 /* make it flipped, again. */
752 ip->ip_len = ntohs((u_short)ip->ip_len);
753 ip->ip_off = ntohs((u_short)ip->ip_off);
754skip_ipsec:
755#endif /*IPSEC*/
756
705 error = ipsec4_output(&state, sp, flags);
706
707 m = state.m;
708 if (flags & IP_ROUTETOIF) {
709 /*
710 * if we have tunnel mode SA, we may need to ignore
711 * IP_ROUTETOIF.
712 */
713 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
714 flags &= ~IP_ROUTETOIF;
715 ro = state.ro;
716 }
717 } else
718 ro = state.ro;
719 dst = (struct sockaddr_in *)state.dst;
720 if (error) {
721 /* mbuf is already reclaimed in ipsec4_output. */
722 m0 = NULL;
723 switch (error) {
724 case EHOSTUNREACH:
725 case ENETUNREACH:
726 case EMSGSIZE:
727 case ENOBUFS:
728 case ENOMEM:
729 break;
730 default:
731 printf("ip4_output (ipsec): error code %d\n", error);
732 /*fall through*/
733 case ENOENT:
734 /* don't show these error codes to the user */
735 error = 0;
736 break;
737 }
738 goto bad;
739 }
740 }
741
742 /* be sure to update variables that are affected by ipsec4_output() */
743 ip = mtod(m, struct ip *);
744#ifdef _IP_VHL
745 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
746#else
747 hlen = ip->ip_hl << 2;
748#endif
749 if (ro->ro_rt == NULL) {
750 if ((flags & IP_ROUTETOIF) == 0) {
751 printf("ip_output: "
752 "can't update route after IPsec processing\n");
753 error = EHOSTUNREACH; /*XXX*/
754 goto bad;
755 }
756 } else {
757 /* nobody uses ia beyond here */
758 ifp = ro->ro_rt->rt_ifp;
759 }
760
761 /* make it flipped, again. */
762 ip->ip_len = ntohs((u_short)ip->ip_len);
763 ip->ip_off = ntohs((u_short)ip->ip_off);
764skip_ipsec:
765#endif /*IPSEC*/
766
767 sw_csum = m->m_pkthdr.csum_flags | CSUM_IP;
768 m->m_pkthdr.csum_flags = sw_csum & ifp->if_hwassist;
769 sw_csum &= ~ifp->if_hwassist;
770 if (sw_csum & CSUM_DELAY_DATA) {
771 in_delayed_cksum(m);
772 sw_csum &= ~CSUM_DELAY_DATA;
773 }
774
757 /*
775 /*
758 * If small enough for interface, can just send directly.
776 * If small enough for interface, or the interface will take
777 * care of the fragmentation for us, can just send directly.
759 */
778 */
760 if ((u_short)ip->ip_len <= ifp->if_mtu) {
779 if ((u_short)ip->ip_len <= ifp->if_mtu ||
780 ifp->if_hwassist & CSUM_FRAGMENT) {
761 ip->ip_len = htons((u_short)ip->ip_len);
762 ip->ip_off = htons((u_short)ip->ip_off);
763 ip->ip_sum = 0;
781 ip->ip_len = htons((u_short)ip->ip_len);
782 ip->ip_off = htons((u_short)ip->ip_off);
783 ip->ip_sum = 0;
764 if (ip->ip_vhl == IP_VHL_BORING) {
765 ip->ip_sum = in_cksum_hdr(ip);
766 } else {
767 ip->ip_sum = in_cksum(m, hlen);
784 if (sw_csum & CSUM_DELAY_IP) {
785 if (ip->ip_vhl == IP_VHL_BORING) {
786 ip->ip_sum = in_cksum_hdr(ip);
787 } else {
788 ip->ip_sum = in_cksum(m, hlen);
789 }
768 }
769 error = (*ifp->if_output)(ifp, m,
770 (struct sockaddr *)dst, ro->ro_rt);
771 goto done;
772 }
773 /*
774 * Too large for interface; fragment if possible.
775 * Must be able to put at least 8 bytes per fragment.
776 */
777 if (ip->ip_off & IP_DF) {
778 error = EMSGSIZE;
779 /*
780 * This case can happen if the user changed the MTU
781 * of an interface after enabling IP on it. Because
782 * most netifs don't keep track of routes pointing to
783 * them, there is no way for one to update all its
784 * routes when the MTU is changed.
785 */
786 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
787 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
788 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
789 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
790 }
791 ipstat.ips_cantfrag++;
792 goto bad;
793 }
794 len = (ifp->if_mtu - hlen) &~ 7;
795 if (len < 8) {
796 error = EMSGSIZE;
797 goto bad;
798 }
799
790 }
791 error = (*ifp->if_output)(ifp, m,
792 (struct sockaddr *)dst, ro->ro_rt);
793 goto done;
794 }
795 /*
796 * Too large for interface; fragment if possible.
797 * Must be able to put at least 8 bytes per fragment.
798 */
799 if (ip->ip_off & IP_DF) {
800 error = EMSGSIZE;
801 /*
802 * This case can happen if the user changed the MTU
803 * of an interface after enabling IP on it. Because
804 * most netifs don't keep track of routes pointing to
805 * them, there is no way for one to update all its
806 * routes when the MTU is changed.
807 */
808 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
809 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
810 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
811 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
812 }
813 ipstat.ips_cantfrag++;
814 goto bad;
815 }
816 len = (ifp->if_mtu - hlen) &~ 7;
817 if (len < 8) {
818 error = EMSGSIZE;
819 goto bad;
820 }
821
822 /*
823 * if the interface will not calculate checksums on
824 * fragmented packets, then do it here.
825 */
826 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
827 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
828 in_delayed_cksum(m);
829 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
830 }
831
800 {
801 int mhlen, firstlen = len;
802 struct mbuf **mnext = &m->m_nextpkt;
832 {
833 int mhlen, firstlen = len;
834 struct mbuf **mnext = &m->m_nextpkt;
835 int nfrags = 1;
803
804 /*
805 * Loop through length of segment after first fragment,
806 * make new header and copy data of each part and link onto chain.
807 */
808 m0 = m;
809 mhlen = sizeof (struct ip);
810 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
811 MGETHDR(m, M_DONTWAIT, MT_HEADER);
812 if (m == 0) {
813 error = ENOBUFS;
814 ipstat.ips_odropped++;
815 goto sendorfree;
816 }
836
837 /*
838 * Loop through length of segment after first fragment,
839 * make new header and copy data of each part and link onto chain.
840 */
841 m0 = m;
842 mhlen = sizeof (struct ip);
843 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
844 MGETHDR(m, M_DONTWAIT, MT_HEADER);
845 if (m == 0) {
846 error = ENOBUFS;
847 ipstat.ips_odropped++;
848 goto sendorfree;
849 }
817 m->m_flags |= (m0->m_flags & M_MCAST);
850 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
818 m->m_data += max_linkhdr;
819 mhip = mtod(m, struct ip *);
820 *mhip = *ip;
821 if (hlen > sizeof (struct ip)) {
822 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
823 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
824 }
825 m->m_len = mhlen;
826 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
827 if (ip->ip_off & IP_MF)
828 mhip->ip_off |= IP_MF;
829 if (off + len >= (u_short)ip->ip_len)
830 len = (u_short)ip->ip_len - off;
831 else
832 mhip->ip_off |= IP_MF;
833 mhip->ip_len = htons((u_short)(len + mhlen));
834 m->m_next = m_copy(m0, off, len);
835 if (m->m_next == 0) {
836 (void) m_free(m);
837 error = ENOBUFS; /* ??? */
838 ipstat.ips_odropped++;
839 goto sendorfree;
840 }
841 m->m_pkthdr.len = mhlen + len;
842 m->m_pkthdr.rcvif = (struct ifnet *)0;
851 m->m_data += max_linkhdr;
852 mhip = mtod(m, struct ip *);
853 *mhip = *ip;
854 if (hlen > sizeof (struct ip)) {
855 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
856 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
857 }
858 m->m_len = mhlen;
859 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
860 if (ip->ip_off & IP_MF)
861 mhip->ip_off |= IP_MF;
862 if (off + len >= (u_short)ip->ip_len)
863 len = (u_short)ip->ip_len - off;
864 else
865 mhip->ip_off |= IP_MF;
866 mhip->ip_len = htons((u_short)(len + mhlen));
867 m->m_next = m_copy(m0, off, len);
868 if (m->m_next == 0) {
869 (void) m_free(m);
870 error = ENOBUFS; /* ??? */
871 ipstat.ips_odropped++;
872 goto sendorfree;
873 }
874 m->m_pkthdr.len = mhlen + len;
875 m->m_pkthdr.rcvif = (struct ifnet *)0;
876 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
843 mhip->ip_off = htons((u_short)mhip->ip_off);
844 mhip->ip_sum = 0;
877 mhip->ip_off = htons((u_short)mhip->ip_off);
878 mhip->ip_sum = 0;
845 if (mhip->ip_vhl == IP_VHL_BORING) {
846 mhip->ip_sum = in_cksum_hdr(mhip);
847 } else {
848 mhip->ip_sum = in_cksum(m, mhlen);
879 if (sw_csum & CSUM_DELAY_IP) {
880 if (mhip->ip_vhl == IP_VHL_BORING) {
881 mhip->ip_sum = in_cksum_hdr(mhip);
882 } else {
883 mhip->ip_sum = in_cksum(m, mhlen);
884 }
849 }
850 *mnext = m;
851 mnext = &m->m_nextpkt;
885 }
886 *mnext = m;
887 mnext = &m->m_nextpkt;
852 ipstat.ips_ofragments++;
888 nfrags++;
853 }
889 }
890 ipstat.ips_ofragments += nfrags;
891
892 /* set first/last markers for fragment chain */
893 m->m_flags |= M_LASTFRAG;
894 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
895 m0->m_pkthdr.csum_data = nfrags;
896
854 /*
855 * Update first fragment by trimming what's been copied out
856 * and updating header, then send each fragment (in order).
857 */
858 m = m0;
859 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
860 m->m_pkthdr.len = hlen + firstlen;
861 ip->ip_len = htons((u_short)m->m_pkthdr.len);
862 ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
863 ip->ip_sum = 0;
897 /*
898 * Update first fragment by trimming what's been copied out
899 * and updating header, then send each fragment (in order).
900 */
901 m = m0;
902 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
903 m->m_pkthdr.len = hlen + firstlen;
904 ip->ip_len = htons((u_short)m->m_pkthdr.len);
905 ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
906 ip->ip_sum = 0;
864 if (ip->ip_vhl == IP_VHL_BORING) {
865 ip->ip_sum = in_cksum_hdr(ip);
866 } else {
867 ip->ip_sum = in_cksum(m, hlen);
907 if (sw_csum & CSUM_DELAY_IP) {
908 if (ip->ip_vhl == IP_VHL_BORING) {
909 ip->ip_sum = in_cksum_hdr(ip);
910 } else {
911 ip->ip_sum = in_cksum(m, hlen);
912 }
868 }
869sendorfree:
870 for (m = m0; m; m = m0) {
871 m0 = m->m_nextpkt;
872 m->m_nextpkt = 0;
873 if (error == 0)
874 error = (*ifp->if_output)(ifp, m,
875 (struct sockaddr *)dst, ro->ro_rt);
876 else
877 m_freem(m);
878 }
879
880 if (error == 0)
881 ipstat.ips_fragmented++;
882 }
883done:
884#ifdef IPSEC
885 if (ro == &iproute && ro->ro_rt) {
886 RTFREE(ro->ro_rt);
887 ro->ro_rt = NULL;
888 }
889 if (sp != NULL) {
890 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
891 printf("DP ip_output call free SP:%p\n", sp));
892 key_freesp(sp);
893 }
894#endif /* IPSEC */
895 return (error);
896bad:
897 m_freem(m0);
898 goto done;
899}
900
913 }
914sendorfree:
915 for (m = m0; m; m = m0) {
916 m0 = m->m_nextpkt;
917 m->m_nextpkt = 0;
918 if (error == 0)
919 error = (*ifp->if_output)(ifp, m,
920 (struct sockaddr *)dst, ro->ro_rt);
921 else
922 m_freem(m);
923 }
924
925 if (error == 0)
926 ipstat.ips_fragmented++;
927 }
928done:
929#ifdef IPSEC
930 if (ro == &iproute && ro->ro_rt) {
931 RTFREE(ro->ro_rt);
932 ro->ro_rt = NULL;
933 }
934 if (sp != NULL) {
935 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
936 printf("DP ip_output call free SP:%p\n", sp));
937 key_freesp(sp);
938 }
939#endif /* IPSEC */
940 return (error);
941bad:
942 m_freem(m0);
943 goto done;
944}
945
946static void
947in_delayed_cksum(struct mbuf *m)
948{
949 struct ip *ip;
950 u_short csum, offset;
951
952 ip = mtod(m, struct ip *);
953 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
954 csum = in_cksum_skip(m, ip->ip_len, offset);
955 offset += m->m_pkthdr.csum_data; /* checksum offset */
956
957 if (offset + sizeof(u_short) > m->m_len) {
958 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
959 m->m_len, offset, ip->ip_p);
960 /*
961 * XXX
962 * this shouldn't happen, but if it does, the
963 * correct behavior may be to insert the checksum
964 * in the existing chain instead of rearranging it.
965 */
966 m = m_pullup(m, offset + sizeof(u_short));
967 }
968 *(u_short *)(m->m_data + offset) = csum;
969}
970
901/*
902 * Insert IP options into preformed packet.
903 * Adjust IP destination as required for IP source routing,
904 * as indicated by a non-zero in_addr at the start of the options.
905 *
906 * XXX This routine assumes that the packet has no options in place.
907 */
908static struct mbuf *
909ip_insertoptions(m, opt, phlen)
910 register struct mbuf *m;
911 struct mbuf *opt;
912 int *phlen;
913{
914 register struct ipoption *p = mtod(opt, struct ipoption *);
915 struct mbuf *n;
916 register struct ip *ip = mtod(m, struct ip *);
917 unsigned optlen;
918
919 optlen = opt->m_len - sizeof(p->ipopt_dst);
920 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
921 return (m); /* XXX should fail */
922 if (p->ipopt_dst.s_addr)
923 ip->ip_dst = p->ipopt_dst;
924 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
925 MGETHDR(n, M_DONTWAIT, MT_HEADER);
926 if (n == 0)
927 return (m);
928 n->m_pkthdr.rcvif = (struct ifnet *)0;
929 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
930 m->m_len -= sizeof(struct ip);
931 m->m_data += sizeof(struct ip);
932 n->m_next = m;
933 m = n;
934 m->m_len = optlen + sizeof(struct ip);
935 m->m_data += max_linkhdr;
936 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
937 } else {
938 m->m_data -= optlen;
939 m->m_len += optlen;
940 m->m_pkthdr.len += optlen;
941 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
942 }
943 ip = mtod(m, struct ip *);
944 bcopy(p->ipopt_list, ip + 1, optlen);
945 *phlen = sizeof(struct ip) + optlen;
946 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
947 ip->ip_len += optlen;
948 return (m);
949}
950
951/*
952 * Copy options from ip to jp,
953 * omitting those not copied during fragmentation.
954 */
955int
956ip_optcopy(ip, jp)
957 struct ip *ip, *jp;
958{
959 register u_char *cp, *dp;
960 int opt, optlen, cnt;
961
962 cp = (u_char *)(ip + 1);
963 dp = (u_char *)(jp + 1);
964 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
965 for (; cnt > 0; cnt -= optlen, cp += optlen) {
966 opt = cp[0];
967 if (opt == IPOPT_EOL)
968 break;
969 if (opt == IPOPT_NOP) {
970 /* Preserve for IP mcast tunnel's LSRR alignment. */
971 *dp++ = IPOPT_NOP;
972 optlen = 1;
973 continue;
974 } else
975 optlen = cp[IPOPT_OLEN];
976 /* bogus lengths should have been caught by ip_dooptions */
977 if (optlen > cnt)
978 optlen = cnt;
979 if (IPOPT_COPIED(opt)) {
980 bcopy(cp, dp, optlen);
981 dp += optlen;
982 }
983 }
984 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
985 *dp++ = IPOPT_EOL;
986 return (optlen);
987}
988
989/*
990 * IP socket option processing.
991 */
992int
993ip_ctloutput(so, sopt)
994 struct socket *so;
995 struct sockopt *sopt;
996{
997 struct inpcb *inp = sotoinpcb(so);
998 int error, optval;
999
1000 error = optval = 0;
1001 if (sopt->sopt_level != IPPROTO_IP) {
1002 return (EINVAL);
1003 }
1004
1005 switch (sopt->sopt_dir) {
1006 case SOPT_SET:
1007 switch (sopt->sopt_name) {
1008 case IP_OPTIONS:
1009#ifdef notyet
1010 case IP_RETOPTS:
1011#endif
1012 {
1013 struct mbuf *m;
1014 if (sopt->sopt_valsize > MLEN) {
1015 error = EMSGSIZE;
1016 break;
1017 }
1018 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1019 if (m == 0) {
1020 error = ENOBUFS;
1021 break;
1022 }
1023 m->m_len = sopt->sopt_valsize;
1024 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1025 m->m_len);
1026
1027 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1028 m));
1029 }
1030
1031 case IP_TOS:
1032 case IP_TTL:
1033 case IP_RECVOPTS:
1034 case IP_RECVRETOPTS:
1035 case IP_RECVDSTADDR:
1036 case IP_RECVIF:
1037#if defined(NFAITH) && NFAITH > 0
1038 case IP_FAITH:
1039#endif
1040 error = sooptcopyin(sopt, &optval, sizeof optval,
1041 sizeof optval);
1042 if (error)
1043 break;
1044
1045 switch (sopt->sopt_name) {
1046 case IP_TOS:
1047 inp->inp_ip_tos = optval;
1048 break;
1049
1050 case IP_TTL:
1051 inp->inp_ip_ttl = optval;
1052 break;
1053#define OPTSET(bit) \
1054 if (optval) \
1055 inp->inp_flags |= bit; \
1056 else \
1057 inp->inp_flags &= ~bit;
1058
1059 case IP_RECVOPTS:
1060 OPTSET(INP_RECVOPTS);
1061 break;
1062
1063 case IP_RECVRETOPTS:
1064 OPTSET(INP_RECVRETOPTS);
1065 break;
1066
1067 case IP_RECVDSTADDR:
1068 OPTSET(INP_RECVDSTADDR);
1069 break;
1070
1071 case IP_RECVIF:
1072 OPTSET(INP_RECVIF);
1073 break;
1074
1075#if defined(NFAITH) && NFAITH > 0
1076 case IP_FAITH:
1077 OPTSET(INP_FAITH);
1078 break;
1079#endif
1080 }
1081 break;
1082#undef OPTSET
1083
1084 case IP_MULTICAST_IF:
1085 case IP_MULTICAST_VIF:
1086 case IP_MULTICAST_TTL:
1087 case IP_MULTICAST_LOOP:
1088 case IP_ADD_MEMBERSHIP:
1089 case IP_DROP_MEMBERSHIP:
1090 error = ip_setmoptions(sopt, &inp->inp_moptions);
1091 break;
1092
1093 case IP_PORTRANGE:
1094 error = sooptcopyin(sopt, &optval, sizeof optval,
1095 sizeof optval);
1096 if (error)
1097 break;
1098
1099 switch (optval) {
1100 case IP_PORTRANGE_DEFAULT:
1101 inp->inp_flags &= ~(INP_LOWPORT);
1102 inp->inp_flags &= ~(INP_HIGHPORT);
1103 break;
1104
1105 case IP_PORTRANGE_HIGH:
1106 inp->inp_flags &= ~(INP_LOWPORT);
1107 inp->inp_flags |= INP_HIGHPORT;
1108 break;
1109
1110 case IP_PORTRANGE_LOW:
1111 inp->inp_flags &= ~(INP_HIGHPORT);
1112 inp->inp_flags |= INP_LOWPORT;
1113 break;
1114
1115 default:
1116 error = EINVAL;
1117 break;
1118 }
1119 break;
1120
1121#ifdef IPSEC
1122 case IP_IPSEC_POLICY:
1123 {
1124 caddr_t req;
1125 int priv;
1126 struct mbuf *m;
1127 int optname;
1128
1129 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1130 break;
1131 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1132 break;
1133 priv = (sopt->sopt_p != NULL &&
1134 suser(sopt->sopt_p) != 0) ? 0 : 1;
1135 req = mtod(m, caddr_t);
1136 optname = sopt->sopt_name;
1137 error = ipsec4_set_policy(inp, optname, req, priv);
1138 m_freem(m);
1139 break;
1140 }
1141#endif /*IPSEC*/
1142
1143 default:
1144 error = ENOPROTOOPT;
1145 break;
1146 }
1147 break;
1148
1149 case SOPT_GET:
1150 switch (sopt->sopt_name) {
1151 case IP_OPTIONS:
1152 case IP_RETOPTS:
1153 if (inp->inp_options)
1154 error = sooptcopyout(sopt,
1155 mtod(inp->inp_options,
1156 char *),
1157 inp->inp_options->m_len);
1158 else
1159 sopt->sopt_valsize = 0;
1160 break;
1161
1162 case IP_TOS:
1163 case IP_TTL:
1164 case IP_RECVOPTS:
1165 case IP_RECVRETOPTS:
1166 case IP_RECVDSTADDR:
1167 case IP_RECVIF:
1168 case IP_PORTRANGE:
1169#if defined(NFAITH) && NFAITH > 0
1170 case IP_FAITH:
1171#endif
1172 switch (sopt->sopt_name) {
1173
1174 case IP_TOS:
1175 optval = inp->inp_ip_tos;
1176 break;
1177
1178 case IP_TTL:
1179 optval = inp->inp_ip_ttl;
1180 break;
1181
1182#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1183
1184 case IP_RECVOPTS:
1185 optval = OPTBIT(INP_RECVOPTS);
1186 break;
1187
1188 case IP_RECVRETOPTS:
1189 optval = OPTBIT(INP_RECVRETOPTS);
1190 break;
1191
1192 case IP_RECVDSTADDR:
1193 optval = OPTBIT(INP_RECVDSTADDR);
1194 break;
1195
1196 case IP_RECVIF:
1197 optval = OPTBIT(INP_RECVIF);
1198 break;
1199
1200 case IP_PORTRANGE:
1201 if (inp->inp_flags & INP_HIGHPORT)
1202 optval = IP_PORTRANGE_HIGH;
1203 else if (inp->inp_flags & INP_LOWPORT)
1204 optval = IP_PORTRANGE_LOW;
1205 else
1206 optval = 0;
1207 break;
1208
1209#if defined(NFAITH) && NFAITH > 0
1210 case IP_FAITH:
1211 optval = OPTBIT(INP_FAITH);
1212 break;
1213#endif
1214 }
1215 error = sooptcopyout(sopt, &optval, sizeof optval);
1216 break;
1217
1218 case IP_MULTICAST_IF:
1219 case IP_MULTICAST_VIF:
1220 case IP_MULTICAST_TTL:
1221 case IP_MULTICAST_LOOP:
1222 case IP_ADD_MEMBERSHIP:
1223 case IP_DROP_MEMBERSHIP:
1224 error = ip_getmoptions(sopt, inp->inp_moptions);
1225 break;
1226
1227#ifdef IPSEC
1228 case IP_IPSEC_POLICY:
1229 {
1230 struct mbuf *m = NULL;
1231 caddr_t req = NULL;
1232
1233 if (m != 0)
1234 req = mtod(m, caddr_t);
1235 error = ipsec4_get_policy(sotoinpcb(so), req, &m);
1236 if (error == 0)
1237 error = soopt_mcopyout(sopt, m); /* XXX */
1238 if (error == 0)
1239 m_freem(m);
1240 break;
1241 }
1242#endif /*IPSEC*/
1243
1244 default:
1245 error = ENOPROTOOPT;
1246 break;
1247 }
1248 break;
1249 }
1250 return (error);
1251}
1252
1253/*
1254 * Set up IP options in pcb for insertion in output packets.
1255 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1256 * with destination address if source routed.
1257 */
1258static int
1259ip_pcbopts(optname, pcbopt, m)
1260 int optname;
1261 struct mbuf **pcbopt;
1262 register struct mbuf *m;
1263{
1264 register int cnt, optlen;
1265 register u_char *cp;
1266 u_char opt;
1267
1268 /* turn off any old options */
1269 if (*pcbopt)
1270 (void)m_free(*pcbopt);
1271 *pcbopt = 0;
1272 if (m == (struct mbuf *)0 || m->m_len == 0) {
1273 /*
1274 * Only turning off any previous options.
1275 */
1276 if (m)
1277 (void)m_free(m);
1278 return (0);
1279 }
1280
1281#ifndef vax
1282 if (m->m_len % sizeof(int32_t))
1283 goto bad;
1284#endif
1285 /*
1286 * IP first-hop destination address will be stored before
1287 * actual options; move other options back
1288 * and clear it when none present.
1289 */
1290 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1291 goto bad;
1292 cnt = m->m_len;
1293 m->m_len += sizeof(struct in_addr);
1294 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1295 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1296 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1297
1298 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1299 opt = cp[IPOPT_OPTVAL];
1300 if (opt == IPOPT_EOL)
1301 break;
1302 if (opt == IPOPT_NOP)
1303 optlen = 1;
1304 else {
1305 optlen = cp[IPOPT_OLEN];
1306 if (optlen <= IPOPT_OLEN || optlen > cnt)
1307 goto bad;
1308 }
1309 switch (opt) {
1310
1311 default:
1312 break;
1313
1314 case IPOPT_LSRR:
1315 case IPOPT_SSRR:
1316 /*
1317 * user process specifies route as:
1318 * ->A->B->C->D
1319 * D must be our final destination (but we can't
1320 * check that since we may not have connected yet).
1321 * A is first hop destination, which doesn't appear in
1322 * actual IP option, but is stored before the options.
1323 */
1324 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1325 goto bad;
1326 m->m_len -= sizeof(struct in_addr);
1327 cnt -= sizeof(struct in_addr);
1328 optlen -= sizeof(struct in_addr);
1329 cp[IPOPT_OLEN] = optlen;
1330 /*
1331 * Move first hop before start of options.
1332 */
1333 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1334 sizeof(struct in_addr));
1335 /*
1336 * Then copy rest of options back
1337 * to close up the deleted entry.
1338 */
1339 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1340 sizeof(struct in_addr)),
1341 (caddr_t)&cp[IPOPT_OFFSET+1],
1342 (unsigned)cnt + sizeof(struct in_addr));
1343 break;
1344 }
1345 }
1346 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1347 goto bad;
1348 *pcbopt = m;
1349 return (0);
1350
1351bad:
1352 (void)m_free(m);
1353 return (EINVAL);
1354}
1355
1356/*
1357 * XXX
1358 * The whole multicast option thing needs to be re-thought.
1359 * Several of these options are equally applicable to non-multicast
1360 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1361 * standard option (IP_TTL).
1362 */
1363/*
1364 * Set the IP multicast options in response to user setsockopt().
1365 */
1366static int
1367ip_setmoptions(sopt, imop)
1368 struct sockopt *sopt;
1369 struct ip_moptions **imop;
1370{
1371 int error = 0;
1372 int i;
1373 struct in_addr addr;
1374 struct ip_mreq mreq;
1375 struct ifnet *ifp;
1376 struct ip_moptions *imo = *imop;
1377 struct route ro;
1378 struct sockaddr_in *dst;
1379 int s;
1380
1381 if (imo == NULL) {
1382 /*
1383 * No multicast option buffer attached to the pcb;
1384 * allocate one and initialize to default values.
1385 */
1386 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1387 M_WAITOK);
1388
1389 if (imo == NULL)
1390 return (ENOBUFS);
1391 *imop = imo;
1392 imo->imo_multicast_ifp = NULL;
1393 imo->imo_multicast_vif = -1;
1394 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1395 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1396 imo->imo_num_memberships = 0;
1397 }
1398
1399 switch (sopt->sopt_name) {
1400 /* store an index number for the vif you wanna use in the send */
1401 case IP_MULTICAST_VIF:
1402 if (legal_vif_num == 0) {
1403 error = EOPNOTSUPP;
1404 break;
1405 }
1406 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1407 if (error)
1408 break;
1409 if (!legal_vif_num(i) && (i != -1)) {
1410 error = EINVAL;
1411 break;
1412 }
1413 imo->imo_multicast_vif = i;
1414 break;
1415
1416 case IP_MULTICAST_IF:
1417 /*
1418 * Select the interface for outgoing multicast packets.
1419 */
1420 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1421 if (error)
1422 break;
1423 /*
1424 * INADDR_ANY is used to remove a previous selection.
1425 * When no interface is selected, a default one is
1426 * chosen every time a multicast packet is sent.
1427 */
1428 if (addr.s_addr == INADDR_ANY) {
1429 imo->imo_multicast_ifp = NULL;
1430 break;
1431 }
1432 /*
1433 * The selected interface is identified by its local
1434 * IP address. Find the interface and confirm that
1435 * it supports multicasting.
1436 */
1437 s = splimp();
1438 INADDR_TO_IFP(addr, ifp);
1439 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1440 splx(s);
1441 error = EADDRNOTAVAIL;
1442 break;
1443 }
1444 imo->imo_multicast_ifp = ifp;
1445 splx(s);
1446 break;
1447
1448 case IP_MULTICAST_TTL:
1449 /*
1450 * Set the IP time-to-live for outgoing multicast packets.
1451 * The original multicast API required a char argument,
1452 * which is inconsistent with the rest of the socket API.
1453 * We allow either a char or an int.
1454 */
1455 if (sopt->sopt_valsize == 1) {
1456 u_char ttl;
1457 error = sooptcopyin(sopt, &ttl, 1, 1);
1458 if (error)
1459 break;
1460 imo->imo_multicast_ttl = ttl;
1461 } else {
1462 u_int ttl;
1463 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1464 sizeof ttl);
1465 if (error)
1466 break;
1467 if (ttl > 255)
1468 error = EINVAL;
1469 else
1470 imo->imo_multicast_ttl = ttl;
1471 }
1472 break;
1473
1474 case IP_MULTICAST_LOOP:
1475 /*
1476 * Set the loopback flag for outgoing multicast packets.
1477 * Must be zero or one. The original multicast API required a
1478 * char argument, which is inconsistent with the rest
1479 * of the socket API. We allow either a char or an int.
1480 */
1481 if (sopt->sopt_valsize == 1) {
1482 u_char loop;
1483 error = sooptcopyin(sopt, &loop, 1, 1);
1484 if (error)
1485 break;
1486 imo->imo_multicast_loop = !!loop;
1487 } else {
1488 u_int loop;
1489 error = sooptcopyin(sopt, &loop, sizeof loop,
1490 sizeof loop);
1491 if (error)
1492 break;
1493 imo->imo_multicast_loop = !!loop;
1494 }
1495 break;
1496
1497 case IP_ADD_MEMBERSHIP:
1498 /*
1499 * Add a multicast group membership.
1500 * Group must be a valid IP multicast address.
1501 */
1502 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1503 if (error)
1504 break;
1505
1506 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1507 error = EINVAL;
1508 break;
1509 }
1510 s = splimp();
1511 /*
1512 * If no interface address was provided, use the interface of
1513 * the route to the given multicast address.
1514 */
1515 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1516 bzero((caddr_t)&ro, sizeof(ro));
1517 dst = (struct sockaddr_in *)&ro.ro_dst;
1518 dst->sin_len = sizeof(*dst);
1519 dst->sin_family = AF_INET;
1520 dst->sin_addr = mreq.imr_multiaddr;
1521 rtalloc(&ro);
1522 if (ro.ro_rt == NULL) {
1523 error = EADDRNOTAVAIL;
1524 splx(s);
1525 break;
1526 }
1527 ifp = ro.ro_rt->rt_ifp;
1528 rtfree(ro.ro_rt);
1529 }
1530 else {
1531 INADDR_TO_IFP(mreq.imr_interface, ifp);
1532 }
1533
1534 /*
1535 * See if we found an interface, and confirm that it
1536 * supports multicast.
1537 */
1538 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1539 error = EADDRNOTAVAIL;
1540 splx(s);
1541 break;
1542 }
1543 /*
1544 * See if the membership already exists or if all the
1545 * membership slots are full.
1546 */
1547 for (i = 0; i < imo->imo_num_memberships; ++i) {
1548 if (imo->imo_membership[i]->inm_ifp == ifp &&
1549 imo->imo_membership[i]->inm_addr.s_addr
1550 == mreq.imr_multiaddr.s_addr)
1551 break;
1552 }
1553 if (i < imo->imo_num_memberships) {
1554 error = EADDRINUSE;
1555 splx(s);
1556 break;
1557 }
1558 if (i == IP_MAX_MEMBERSHIPS) {
1559 error = ETOOMANYREFS;
1560 splx(s);
1561 break;
1562 }
1563 /*
1564 * Everything looks good; add a new record to the multicast
1565 * address list for the given interface.
1566 */
1567 if ((imo->imo_membership[i] =
1568 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1569 error = ENOBUFS;
1570 splx(s);
1571 break;
1572 }
1573 ++imo->imo_num_memberships;
1574 splx(s);
1575 break;
1576
1577 case IP_DROP_MEMBERSHIP:
1578 /*
1579 * Drop a multicast group membership.
1580 * Group must be a valid IP multicast address.
1581 */
1582 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1583 if (error)
1584 break;
1585
1586 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1587 error = EINVAL;
1588 break;
1589 }
1590
1591 s = splimp();
1592 /*
1593 * If an interface address was specified, get a pointer
1594 * to its ifnet structure.
1595 */
1596 if (mreq.imr_interface.s_addr == INADDR_ANY)
1597 ifp = NULL;
1598 else {
1599 INADDR_TO_IFP(mreq.imr_interface, ifp);
1600 if (ifp == NULL) {
1601 error = EADDRNOTAVAIL;
1602 splx(s);
1603 break;
1604 }
1605 }
1606 /*
1607 * Find the membership in the membership array.
1608 */
1609 for (i = 0; i < imo->imo_num_memberships; ++i) {
1610 if ((ifp == NULL ||
1611 imo->imo_membership[i]->inm_ifp == ifp) &&
1612 imo->imo_membership[i]->inm_addr.s_addr ==
1613 mreq.imr_multiaddr.s_addr)
1614 break;
1615 }
1616 if (i == imo->imo_num_memberships) {
1617 error = EADDRNOTAVAIL;
1618 splx(s);
1619 break;
1620 }
1621 /*
1622 * Give up the multicast address record to which the
1623 * membership points.
1624 */
1625 in_delmulti(imo->imo_membership[i]);
1626 /*
1627 * Remove the gap in the membership array.
1628 */
1629 for (++i; i < imo->imo_num_memberships; ++i)
1630 imo->imo_membership[i-1] = imo->imo_membership[i];
1631 --imo->imo_num_memberships;
1632 splx(s);
1633 break;
1634
1635 default:
1636 error = EOPNOTSUPP;
1637 break;
1638 }
1639
1640 /*
1641 * If all options have default values, no need to keep the mbuf.
1642 */
1643 if (imo->imo_multicast_ifp == NULL &&
1644 imo->imo_multicast_vif == -1 &&
1645 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1646 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1647 imo->imo_num_memberships == 0) {
1648 free(*imop, M_IPMOPTS);
1649 *imop = NULL;
1650 }
1651
1652 return (error);
1653}
1654
1655/*
1656 * Return the IP multicast options in response to user getsockopt().
1657 */
1658static int
1659ip_getmoptions(sopt, imo)
1660 struct sockopt *sopt;
1661 register struct ip_moptions *imo;
1662{
1663 struct in_addr addr;
1664 struct in_ifaddr *ia;
1665 int error, optval;
1666 u_char coptval;
1667
1668 error = 0;
1669 switch (sopt->sopt_name) {
1670 case IP_MULTICAST_VIF:
1671 if (imo != NULL)
1672 optval = imo->imo_multicast_vif;
1673 else
1674 optval = -1;
1675 error = sooptcopyout(sopt, &optval, sizeof optval);
1676 break;
1677
1678 case IP_MULTICAST_IF:
1679 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1680 addr.s_addr = INADDR_ANY;
1681 else {
1682 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1683 addr.s_addr = (ia == NULL) ? INADDR_ANY
1684 : IA_SIN(ia)->sin_addr.s_addr;
1685 }
1686 error = sooptcopyout(sopt, &addr, sizeof addr);
1687 break;
1688
1689 case IP_MULTICAST_TTL:
1690 if (imo == 0)
1691 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1692 else
1693 optval = coptval = imo->imo_multicast_ttl;
1694 if (sopt->sopt_valsize == 1)
1695 error = sooptcopyout(sopt, &coptval, 1);
1696 else
1697 error = sooptcopyout(sopt, &optval, sizeof optval);
1698 break;
1699
1700 case IP_MULTICAST_LOOP:
1701 if (imo == 0)
1702 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1703 else
1704 optval = coptval = imo->imo_multicast_loop;
1705 if (sopt->sopt_valsize == 1)
1706 error = sooptcopyout(sopt, &coptval, 1);
1707 else
1708 error = sooptcopyout(sopt, &optval, sizeof optval);
1709 break;
1710
1711 default:
1712 error = ENOPROTOOPT;
1713 break;
1714 }
1715 return (error);
1716}
1717
1718/*
1719 * Discard the IP multicast options.
1720 */
1721void
1722ip_freemoptions(imo)
1723 register struct ip_moptions *imo;
1724{
1725 register int i;
1726
1727 if (imo != NULL) {
1728 for (i = 0; i < imo->imo_num_memberships; ++i)
1729 in_delmulti(imo->imo_membership[i]);
1730 free(imo, M_IPMOPTS);
1731 }
1732}
1733
1734/*
1735 * Routine called from ip_output() to loop back a copy of an IP multicast
1736 * packet to the input queue of a specified interface. Note that this
1737 * calls the output routine of the loopback "driver", but with an interface
1738 * pointer that might NOT be a loopback interface -- evil, but easier than
1739 * replicating that code here.
1740 */
1741static void
1742ip_mloopback(ifp, m, dst, hlen)
1743 struct ifnet *ifp;
1744 register struct mbuf *m;
1745 register struct sockaddr_in *dst;
1746 int hlen;
1747{
1748 register struct ip *ip;
1749 struct mbuf *copym;
1750
1751 copym = m_copy(m, 0, M_COPYALL);
1752 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1753 copym = m_pullup(copym, hlen);
1754 if (copym != NULL) {
1755 /*
1756 * We don't bother to fragment if the IP length is greater
1757 * than the interface's MTU. Can this possibly matter?
1758 */
1759 ip = mtod(copym, struct ip *);
1760 ip->ip_len = htons((u_short)ip->ip_len);
1761 ip->ip_off = htons((u_short)ip->ip_off);
1762 ip->ip_sum = 0;
1763 if (ip->ip_vhl == IP_VHL_BORING) {
1764 ip->ip_sum = in_cksum_hdr(ip);
1765 } else {
1766 ip->ip_sum = in_cksum(copym, hlen);
1767 }
1768 /*
1769 * NB:
1770 * It's not clear whether there are any lingering
1771 * reentrancy problems in other areas which might
1772 * be exposed by using ip_input directly (in
1773 * particular, everything which modifies the packet
1774 * in-place). Yet another option is using the
1775 * protosw directly to deliver the looped back
1776 * packet. For the moment, we'll err on the side
1777 * of safety by using if_simloop().
1778 */
1779#if 1 /* XXX */
1780 if (dst->sin_family != AF_INET) {
1781 printf("ip_mloopback: bad address family %d\n",
1782 dst->sin_family);
1783 dst->sin_family = AF_INET;
1784 }
1785#endif
1786
1787#ifdef notdef
1788 copym->m_pkthdr.rcvif = ifp;
1789 ip_input(copym);
1790#else
1791 if_simloop(ifp, copym, (struct sockaddr *)dst, 0);
1792#endif
1793 }
1794}
971/*
972 * Insert IP options into preformed packet.
973 * Adjust IP destination as required for IP source routing,
974 * as indicated by a non-zero in_addr at the start of the options.
975 *
976 * XXX This routine assumes that the packet has no options in place.
977 */
978static struct mbuf *
979ip_insertoptions(m, opt, phlen)
980 register struct mbuf *m;
981 struct mbuf *opt;
982 int *phlen;
983{
984 register struct ipoption *p = mtod(opt, struct ipoption *);
985 struct mbuf *n;
986 register struct ip *ip = mtod(m, struct ip *);
987 unsigned optlen;
988
989 optlen = opt->m_len - sizeof(p->ipopt_dst);
990 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
991 return (m); /* XXX should fail */
992 if (p->ipopt_dst.s_addr)
993 ip->ip_dst = p->ipopt_dst;
994 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
995 MGETHDR(n, M_DONTWAIT, MT_HEADER);
996 if (n == 0)
997 return (m);
998 n->m_pkthdr.rcvif = (struct ifnet *)0;
999 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1000 m->m_len -= sizeof(struct ip);
1001 m->m_data += sizeof(struct ip);
1002 n->m_next = m;
1003 m = n;
1004 m->m_len = optlen + sizeof(struct ip);
1005 m->m_data += max_linkhdr;
1006 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1007 } else {
1008 m->m_data -= optlen;
1009 m->m_len += optlen;
1010 m->m_pkthdr.len += optlen;
1011 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1012 }
1013 ip = mtod(m, struct ip *);
1014 bcopy(p->ipopt_list, ip + 1, optlen);
1015 *phlen = sizeof(struct ip) + optlen;
1016 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1017 ip->ip_len += optlen;
1018 return (m);
1019}
1020
1021/*
1022 * Copy options from ip to jp,
1023 * omitting those not copied during fragmentation.
1024 */
1025int
1026ip_optcopy(ip, jp)
1027 struct ip *ip, *jp;
1028{
1029 register u_char *cp, *dp;
1030 int opt, optlen, cnt;
1031
1032 cp = (u_char *)(ip + 1);
1033 dp = (u_char *)(jp + 1);
1034 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1035 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1036 opt = cp[0];
1037 if (opt == IPOPT_EOL)
1038 break;
1039 if (opt == IPOPT_NOP) {
1040 /* Preserve for IP mcast tunnel's LSRR alignment. */
1041 *dp++ = IPOPT_NOP;
1042 optlen = 1;
1043 continue;
1044 } else
1045 optlen = cp[IPOPT_OLEN];
1046 /* bogus lengths should have been caught by ip_dooptions */
1047 if (optlen > cnt)
1048 optlen = cnt;
1049 if (IPOPT_COPIED(opt)) {
1050 bcopy(cp, dp, optlen);
1051 dp += optlen;
1052 }
1053 }
1054 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1055 *dp++ = IPOPT_EOL;
1056 return (optlen);
1057}
1058
1059/*
1060 * IP socket option processing.
1061 */
1062int
1063ip_ctloutput(so, sopt)
1064 struct socket *so;
1065 struct sockopt *sopt;
1066{
1067 struct inpcb *inp = sotoinpcb(so);
1068 int error, optval;
1069
1070 error = optval = 0;
1071 if (sopt->sopt_level != IPPROTO_IP) {
1072 return (EINVAL);
1073 }
1074
1075 switch (sopt->sopt_dir) {
1076 case SOPT_SET:
1077 switch (sopt->sopt_name) {
1078 case IP_OPTIONS:
1079#ifdef notyet
1080 case IP_RETOPTS:
1081#endif
1082 {
1083 struct mbuf *m;
1084 if (sopt->sopt_valsize > MLEN) {
1085 error = EMSGSIZE;
1086 break;
1087 }
1088 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1089 if (m == 0) {
1090 error = ENOBUFS;
1091 break;
1092 }
1093 m->m_len = sopt->sopt_valsize;
1094 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1095 m->m_len);
1096
1097 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1098 m));
1099 }
1100
1101 case IP_TOS:
1102 case IP_TTL:
1103 case IP_RECVOPTS:
1104 case IP_RECVRETOPTS:
1105 case IP_RECVDSTADDR:
1106 case IP_RECVIF:
1107#if defined(NFAITH) && NFAITH > 0
1108 case IP_FAITH:
1109#endif
1110 error = sooptcopyin(sopt, &optval, sizeof optval,
1111 sizeof optval);
1112 if (error)
1113 break;
1114
1115 switch (sopt->sopt_name) {
1116 case IP_TOS:
1117 inp->inp_ip_tos = optval;
1118 break;
1119
1120 case IP_TTL:
1121 inp->inp_ip_ttl = optval;
1122 break;
1123#define OPTSET(bit) \
1124 if (optval) \
1125 inp->inp_flags |= bit; \
1126 else \
1127 inp->inp_flags &= ~bit;
1128
1129 case IP_RECVOPTS:
1130 OPTSET(INP_RECVOPTS);
1131 break;
1132
1133 case IP_RECVRETOPTS:
1134 OPTSET(INP_RECVRETOPTS);
1135 break;
1136
1137 case IP_RECVDSTADDR:
1138 OPTSET(INP_RECVDSTADDR);
1139 break;
1140
1141 case IP_RECVIF:
1142 OPTSET(INP_RECVIF);
1143 break;
1144
1145#if defined(NFAITH) && NFAITH > 0
1146 case IP_FAITH:
1147 OPTSET(INP_FAITH);
1148 break;
1149#endif
1150 }
1151 break;
1152#undef OPTSET
1153
1154 case IP_MULTICAST_IF:
1155 case IP_MULTICAST_VIF:
1156 case IP_MULTICAST_TTL:
1157 case IP_MULTICAST_LOOP:
1158 case IP_ADD_MEMBERSHIP:
1159 case IP_DROP_MEMBERSHIP:
1160 error = ip_setmoptions(sopt, &inp->inp_moptions);
1161 break;
1162
1163 case IP_PORTRANGE:
1164 error = sooptcopyin(sopt, &optval, sizeof optval,
1165 sizeof optval);
1166 if (error)
1167 break;
1168
1169 switch (optval) {
1170 case IP_PORTRANGE_DEFAULT:
1171 inp->inp_flags &= ~(INP_LOWPORT);
1172 inp->inp_flags &= ~(INP_HIGHPORT);
1173 break;
1174
1175 case IP_PORTRANGE_HIGH:
1176 inp->inp_flags &= ~(INP_LOWPORT);
1177 inp->inp_flags |= INP_HIGHPORT;
1178 break;
1179
1180 case IP_PORTRANGE_LOW:
1181 inp->inp_flags &= ~(INP_HIGHPORT);
1182 inp->inp_flags |= INP_LOWPORT;
1183 break;
1184
1185 default:
1186 error = EINVAL;
1187 break;
1188 }
1189 break;
1190
1191#ifdef IPSEC
1192 case IP_IPSEC_POLICY:
1193 {
1194 caddr_t req;
1195 int priv;
1196 struct mbuf *m;
1197 int optname;
1198
1199 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1200 break;
1201 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1202 break;
1203 priv = (sopt->sopt_p != NULL &&
1204 suser(sopt->sopt_p) != 0) ? 0 : 1;
1205 req = mtod(m, caddr_t);
1206 optname = sopt->sopt_name;
1207 error = ipsec4_set_policy(inp, optname, req, priv);
1208 m_freem(m);
1209 break;
1210 }
1211#endif /*IPSEC*/
1212
1213 default:
1214 error = ENOPROTOOPT;
1215 break;
1216 }
1217 break;
1218
1219 case SOPT_GET:
1220 switch (sopt->sopt_name) {
1221 case IP_OPTIONS:
1222 case IP_RETOPTS:
1223 if (inp->inp_options)
1224 error = sooptcopyout(sopt,
1225 mtod(inp->inp_options,
1226 char *),
1227 inp->inp_options->m_len);
1228 else
1229 sopt->sopt_valsize = 0;
1230 break;
1231
1232 case IP_TOS:
1233 case IP_TTL:
1234 case IP_RECVOPTS:
1235 case IP_RECVRETOPTS:
1236 case IP_RECVDSTADDR:
1237 case IP_RECVIF:
1238 case IP_PORTRANGE:
1239#if defined(NFAITH) && NFAITH > 0
1240 case IP_FAITH:
1241#endif
1242 switch (sopt->sopt_name) {
1243
1244 case IP_TOS:
1245 optval = inp->inp_ip_tos;
1246 break;
1247
1248 case IP_TTL:
1249 optval = inp->inp_ip_ttl;
1250 break;
1251
1252#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1253
1254 case IP_RECVOPTS:
1255 optval = OPTBIT(INP_RECVOPTS);
1256 break;
1257
1258 case IP_RECVRETOPTS:
1259 optval = OPTBIT(INP_RECVRETOPTS);
1260 break;
1261
1262 case IP_RECVDSTADDR:
1263 optval = OPTBIT(INP_RECVDSTADDR);
1264 break;
1265
1266 case IP_RECVIF:
1267 optval = OPTBIT(INP_RECVIF);
1268 break;
1269
1270 case IP_PORTRANGE:
1271 if (inp->inp_flags & INP_HIGHPORT)
1272 optval = IP_PORTRANGE_HIGH;
1273 else if (inp->inp_flags & INP_LOWPORT)
1274 optval = IP_PORTRANGE_LOW;
1275 else
1276 optval = 0;
1277 break;
1278
1279#if defined(NFAITH) && NFAITH > 0
1280 case IP_FAITH:
1281 optval = OPTBIT(INP_FAITH);
1282 break;
1283#endif
1284 }
1285 error = sooptcopyout(sopt, &optval, sizeof optval);
1286 break;
1287
1288 case IP_MULTICAST_IF:
1289 case IP_MULTICAST_VIF:
1290 case IP_MULTICAST_TTL:
1291 case IP_MULTICAST_LOOP:
1292 case IP_ADD_MEMBERSHIP:
1293 case IP_DROP_MEMBERSHIP:
1294 error = ip_getmoptions(sopt, inp->inp_moptions);
1295 break;
1296
1297#ifdef IPSEC
1298 case IP_IPSEC_POLICY:
1299 {
1300 struct mbuf *m = NULL;
1301 caddr_t req = NULL;
1302
1303 if (m != 0)
1304 req = mtod(m, caddr_t);
1305 error = ipsec4_get_policy(sotoinpcb(so), req, &m);
1306 if (error == 0)
1307 error = soopt_mcopyout(sopt, m); /* XXX */
1308 if (error == 0)
1309 m_freem(m);
1310 break;
1311 }
1312#endif /*IPSEC*/
1313
1314 default:
1315 error = ENOPROTOOPT;
1316 break;
1317 }
1318 break;
1319 }
1320 return (error);
1321}
1322
1323/*
1324 * Set up IP options in pcb for insertion in output packets.
1325 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1326 * with destination address if source routed.
1327 */
1328static int
1329ip_pcbopts(optname, pcbopt, m)
1330 int optname;
1331 struct mbuf **pcbopt;
1332 register struct mbuf *m;
1333{
1334 register int cnt, optlen;
1335 register u_char *cp;
1336 u_char opt;
1337
1338 /* turn off any old options */
1339 if (*pcbopt)
1340 (void)m_free(*pcbopt);
1341 *pcbopt = 0;
1342 if (m == (struct mbuf *)0 || m->m_len == 0) {
1343 /*
1344 * Only turning off any previous options.
1345 */
1346 if (m)
1347 (void)m_free(m);
1348 return (0);
1349 }
1350
1351#ifndef vax
1352 if (m->m_len % sizeof(int32_t))
1353 goto bad;
1354#endif
1355 /*
1356 * IP first-hop destination address will be stored before
1357 * actual options; move other options back
1358 * and clear it when none present.
1359 */
1360 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1361 goto bad;
1362 cnt = m->m_len;
1363 m->m_len += sizeof(struct in_addr);
1364 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1365 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1366 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1367
1368 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1369 opt = cp[IPOPT_OPTVAL];
1370 if (opt == IPOPT_EOL)
1371 break;
1372 if (opt == IPOPT_NOP)
1373 optlen = 1;
1374 else {
1375 optlen = cp[IPOPT_OLEN];
1376 if (optlen <= IPOPT_OLEN || optlen > cnt)
1377 goto bad;
1378 }
1379 switch (opt) {
1380
1381 default:
1382 break;
1383
1384 case IPOPT_LSRR:
1385 case IPOPT_SSRR:
1386 /*
1387 * user process specifies route as:
1388 * ->A->B->C->D
1389 * D must be our final destination (but we can't
1390 * check that since we may not have connected yet).
1391 * A is first hop destination, which doesn't appear in
1392 * actual IP option, but is stored before the options.
1393 */
1394 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1395 goto bad;
1396 m->m_len -= sizeof(struct in_addr);
1397 cnt -= sizeof(struct in_addr);
1398 optlen -= sizeof(struct in_addr);
1399 cp[IPOPT_OLEN] = optlen;
1400 /*
1401 * Move first hop before start of options.
1402 */
1403 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1404 sizeof(struct in_addr));
1405 /*
1406 * Then copy rest of options back
1407 * to close up the deleted entry.
1408 */
1409 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1410 sizeof(struct in_addr)),
1411 (caddr_t)&cp[IPOPT_OFFSET+1],
1412 (unsigned)cnt + sizeof(struct in_addr));
1413 break;
1414 }
1415 }
1416 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1417 goto bad;
1418 *pcbopt = m;
1419 return (0);
1420
1421bad:
1422 (void)m_free(m);
1423 return (EINVAL);
1424}
1425
1426/*
1427 * XXX
1428 * The whole multicast option thing needs to be re-thought.
1429 * Several of these options are equally applicable to non-multicast
1430 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1431 * standard option (IP_TTL).
1432 */
1433/*
1434 * Set the IP multicast options in response to user setsockopt().
1435 */
1436static int
1437ip_setmoptions(sopt, imop)
1438 struct sockopt *sopt;
1439 struct ip_moptions **imop;
1440{
1441 int error = 0;
1442 int i;
1443 struct in_addr addr;
1444 struct ip_mreq mreq;
1445 struct ifnet *ifp;
1446 struct ip_moptions *imo = *imop;
1447 struct route ro;
1448 struct sockaddr_in *dst;
1449 int s;
1450
1451 if (imo == NULL) {
1452 /*
1453 * No multicast option buffer attached to the pcb;
1454 * allocate one and initialize to default values.
1455 */
1456 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1457 M_WAITOK);
1458
1459 if (imo == NULL)
1460 return (ENOBUFS);
1461 *imop = imo;
1462 imo->imo_multicast_ifp = NULL;
1463 imo->imo_multicast_vif = -1;
1464 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1465 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1466 imo->imo_num_memberships = 0;
1467 }
1468
1469 switch (sopt->sopt_name) {
1470 /* store an index number for the vif you wanna use in the send */
1471 case IP_MULTICAST_VIF:
1472 if (legal_vif_num == 0) {
1473 error = EOPNOTSUPP;
1474 break;
1475 }
1476 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1477 if (error)
1478 break;
1479 if (!legal_vif_num(i) && (i != -1)) {
1480 error = EINVAL;
1481 break;
1482 }
1483 imo->imo_multicast_vif = i;
1484 break;
1485
1486 case IP_MULTICAST_IF:
1487 /*
1488 * Select the interface for outgoing multicast packets.
1489 */
1490 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1491 if (error)
1492 break;
1493 /*
1494 * INADDR_ANY is used to remove a previous selection.
1495 * When no interface is selected, a default one is
1496 * chosen every time a multicast packet is sent.
1497 */
1498 if (addr.s_addr == INADDR_ANY) {
1499 imo->imo_multicast_ifp = NULL;
1500 break;
1501 }
1502 /*
1503 * The selected interface is identified by its local
1504 * IP address. Find the interface and confirm that
1505 * it supports multicasting.
1506 */
1507 s = splimp();
1508 INADDR_TO_IFP(addr, ifp);
1509 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1510 splx(s);
1511 error = EADDRNOTAVAIL;
1512 break;
1513 }
1514 imo->imo_multicast_ifp = ifp;
1515 splx(s);
1516 break;
1517
1518 case IP_MULTICAST_TTL:
1519 /*
1520 * Set the IP time-to-live for outgoing multicast packets.
1521 * The original multicast API required a char argument,
1522 * which is inconsistent with the rest of the socket API.
1523 * We allow either a char or an int.
1524 */
1525 if (sopt->sopt_valsize == 1) {
1526 u_char ttl;
1527 error = sooptcopyin(sopt, &ttl, 1, 1);
1528 if (error)
1529 break;
1530 imo->imo_multicast_ttl = ttl;
1531 } else {
1532 u_int ttl;
1533 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1534 sizeof ttl);
1535 if (error)
1536 break;
1537 if (ttl > 255)
1538 error = EINVAL;
1539 else
1540 imo->imo_multicast_ttl = ttl;
1541 }
1542 break;
1543
1544 case IP_MULTICAST_LOOP:
1545 /*
1546 * Set the loopback flag for outgoing multicast packets.
1547 * Must be zero or one. The original multicast API required a
1548 * char argument, which is inconsistent with the rest
1549 * of the socket API. We allow either a char or an int.
1550 */
1551 if (sopt->sopt_valsize == 1) {
1552 u_char loop;
1553 error = sooptcopyin(sopt, &loop, 1, 1);
1554 if (error)
1555 break;
1556 imo->imo_multicast_loop = !!loop;
1557 } else {
1558 u_int loop;
1559 error = sooptcopyin(sopt, &loop, sizeof loop,
1560 sizeof loop);
1561 if (error)
1562 break;
1563 imo->imo_multicast_loop = !!loop;
1564 }
1565 break;
1566
1567 case IP_ADD_MEMBERSHIP:
1568 /*
1569 * Add a multicast group membership.
1570 * Group must be a valid IP multicast address.
1571 */
1572 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1573 if (error)
1574 break;
1575
1576 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1577 error = EINVAL;
1578 break;
1579 }
1580 s = splimp();
1581 /*
1582 * If no interface address was provided, use the interface of
1583 * the route to the given multicast address.
1584 */
1585 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1586 bzero((caddr_t)&ro, sizeof(ro));
1587 dst = (struct sockaddr_in *)&ro.ro_dst;
1588 dst->sin_len = sizeof(*dst);
1589 dst->sin_family = AF_INET;
1590 dst->sin_addr = mreq.imr_multiaddr;
1591 rtalloc(&ro);
1592 if (ro.ro_rt == NULL) {
1593 error = EADDRNOTAVAIL;
1594 splx(s);
1595 break;
1596 }
1597 ifp = ro.ro_rt->rt_ifp;
1598 rtfree(ro.ro_rt);
1599 }
1600 else {
1601 INADDR_TO_IFP(mreq.imr_interface, ifp);
1602 }
1603
1604 /*
1605 * See if we found an interface, and confirm that it
1606 * supports multicast.
1607 */
1608 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1609 error = EADDRNOTAVAIL;
1610 splx(s);
1611 break;
1612 }
1613 /*
1614 * See if the membership already exists or if all the
1615 * membership slots are full.
1616 */
1617 for (i = 0; i < imo->imo_num_memberships; ++i) {
1618 if (imo->imo_membership[i]->inm_ifp == ifp &&
1619 imo->imo_membership[i]->inm_addr.s_addr
1620 == mreq.imr_multiaddr.s_addr)
1621 break;
1622 }
1623 if (i < imo->imo_num_memberships) {
1624 error = EADDRINUSE;
1625 splx(s);
1626 break;
1627 }
1628 if (i == IP_MAX_MEMBERSHIPS) {
1629 error = ETOOMANYREFS;
1630 splx(s);
1631 break;
1632 }
1633 /*
1634 * Everything looks good; add a new record to the multicast
1635 * address list for the given interface.
1636 */
1637 if ((imo->imo_membership[i] =
1638 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1639 error = ENOBUFS;
1640 splx(s);
1641 break;
1642 }
1643 ++imo->imo_num_memberships;
1644 splx(s);
1645 break;
1646
1647 case IP_DROP_MEMBERSHIP:
1648 /*
1649 * Drop a multicast group membership.
1650 * Group must be a valid IP multicast address.
1651 */
1652 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1653 if (error)
1654 break;
1655
1656 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1657 error = EINVAL;
1658 break;
1659 }
1660
1661 s = splimp();
1662 /*
1663 * If an interface address was specified, get a pointer
1664 * to its ifnet structure.
1665 */
1666 if (mreq.imr_interface.s_addr == INADDR_ANY)
1667 ifp = NULL;
1668 else {
1669 INADDR_TO_IFP(mreq.imr_interface, ifp);
1670 if (ifp == NULL) {
1671 error = EADDRNOTAVAIL;
1672 splx(s);
1673 break;
1674 }
1675 }
1676 /*
1677 * Find the membership in the membership array.
1678 */
1679 for (i = 0; i < imo->imo_num_memberships; ++i) {
1680 if ((ifp == NULL ||
1681 imo->imo_membership[i]->inm_ifp == ifp) &&
1682 imo->imo_membership[i]->inm_addr.s_addr ==
1683 mreq.imr_multiaddr.s_addr)
1684 break;
1685 }
1686 if (i == imo->imo_num_memberships) {
1687 error = EADDRNOTAVAIL;
1688 splx(s);
1689 break;
1690 }
1691 /*
1692 * Give up the multicast address record to which the
1693 * membership points.
1694 */
1695 in_delmulti(imo->imo_membership[i]);
1696 /*
1697 * Remove the gap in the membership array.
1698 */
1699 for (++i; i < imo->imo_num_memberships; ++i)
1700 imo->imo_membership[i-1] = imo->imo_membership[i];
1701 --imo->imo_num_memberships;
1702 splx(s);
1703 break;
1704
1705 default:
1706 error = EOPNOTSUPP;
1707 break;
1708 }
1709
1710 /*
1711 * If all options have default values, no need to keep the mbuf.
1712 */
1713 if (imo->imo_multicast_ifp == NULL &&
1714 imo->imo_multicast_vif == -1 &&
1715 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1716 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1717 imo->imo_num_memberships == 0) {
1718 free(*imop, M_IPMOPTS);
1719 *imop = NULL;
1720 }
1721
1722 return (error);
1723}
1724
1725/*
1726 * Return the IP multicast options in response to user getsockopt().
1727 */
1728static int
1729ip_getmoptions(sopt, imo)
1730 struct sockopt *sopt;
1731 register struct ip_moptions *imo;
1732{
1733 struct in_addr addr;
1734 struct in_ifaddr *ia;
1735 int error, optval;
1736 u_char coptval;
1737
1738 error = 0;
1739 switch (sopt->sopt_name) {
1740 case IP_MULTICAST_VIF:
1741 if (imo != NULL)
1742 optval = imo->imo_multicast_vif;
1743 else
1744 optval = -1;
1745 error = sooptcopyout(sopt, &optval, sizeof optval);
1746 break;
1747
1748 case IP_MULTICAST_IF:
1749 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1750 addr.s_addr = INADDR_ANY;
1751 else {
1752 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1753 addr.s_addr = (ia == NULL) ? INADDR_ANY
1754 : IA_SIN(ia)->sin_addr.s_addr;
1755 }
1756 error = sooptcopyout(sopt, &addr, sizeof addr);
1757 break;
1758
1759 case IP_MULTICAST_TTL:
1760 if (imo == 0)
1761 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1762 else
1763 optval = coptval = imo->imo_multicast_ttl;
1764 if (sopt->sopt_valsize == 1)
1765 error = sooptcopyout(sopt, &coptval, 1);
1766 else
1767 error = sooptcopyout(sopt, &optval, sizeof optval);
1768 break;
1769
1770 case IP_MULTICAST_LOOP:
1771 if (imo == 0)
1772 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1773 else
1774 optval = coptval = imo->imo_multicast_loop;
1775 if (sopt->sopt_valsize == 1)
1776 error = sooptcopyout(sopt, &coptval, 1);
1777 else
1778 error = sooptcopyout(sopt, &optval, sizeof optval);
1779 break;
1780
1781 default:
1782 error = ENOPROTOOPT;
1783 break;
1784 }
1785 return (error);
1786}
1787
1788/*
1789 * Discard the IP multicast options.
1790 */
1791void
1792ip_freemoptions(imo)
1793 register struct ip_moptions *imo;
1794{
1795 register int i;
1796
1797 if (imo != NULL) {
1798 for (i = 0; i < imo->imo_num_memberships; ++i)
1799 in_delmulti(imo->imo_membership[i]);
1800 free(imo, M_IPMOPTS);
1801 }
1802}
1803
1804/*
1805 * Routine called from ip_output() to loop back a copy of an IP multicast
1806 * packet to the input queue of a specified interface. Note that this
1807 * calls the output routine of the loopback "driver", but with an interface
1808 * pointer that might NOT be a loopback interface -- evil, but easier than
1809 * replicating that code here.
1810 */
1811static void
1812ip_mloopback(ifp, m, dst, hlen)
1813 struct ifnet *ifp;
1814 register struct mbuf *m;
1815 register struct sockaddr_in *dst;
1816 int hlen;
1817{
1818 register struct ip *ip;
1819 struct mbuf *copym;
1820
1821 copym = m_copy(m, 0, M_COPYALL);
1822 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1823 copym = m_pullup(copym, hlen);
1824 if (copym != NULL) {
1825 /*
1826 * We don't bother to fragment if the IP length is greater
1827 * than the interface's MTU. Can this possibly matter?
1828 */
1829 ip = mtod(copym, struct ip *);
1830 ip->ip_len = htons((u_short)ip->ip_len);
1831 ip->ip_off = htons((u_short)ip->ip_off);
1832 ip->ip_sum = 0;
1833 if (ip->ip_vhl == IP_VHL_BORING) {
1834 ip->ip_sum = in_cksum_hdr(ip);
1835 } else {
1836 ip->ip_sum = in_cksum(copym, hlen);
1837 }
1838 /*
1839 * NB:
1840 * It's not clear whether there are any lingering
1841 * reentrancy problems in other areas which might
1842 * be exposed by using ip_input directly (in
1843 * particular, everything which modifies the packet
1844 * in-place). Yet another option is using the
1845 * protosw directly to deliver the looped back
1846 * packet. For the moment, we'll err on the side
1847 * of safety by using if_simloop().
1848 */
1849#if 1 /* XXX */
1850 if (dst->sin_family != AF_INET) {
1851 printf("ip_mloopback: bad address family %d\n",
1852 dst->sin_family);
1853 dst->sin_family = AF_INET;
1854 }
1855#endif
1856
1857#ifdef notdef
1858 copym->m_pkthdr.rcvif = ifp;
1859 ip_input(copym);
1860#else
1861 if_simloop(ifp, copym, (struct sockaddr *)dst, 0);
1862#endif
1863 }
1864}