Deleted Added
full compact
ip_input.c (67334) ip_input.c (67609)
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
34 * $FreeBSD: head/sys/netinet/ip_input.c 67334 2000-10-19 23:15:54Z joe $
34 * $FreeBSD: head/sys/netinet/ip_input.c 67609 2000-10-26 12:18:13Z ru $
35 */
36
37#define _IP_VHL
38
39#include "opt_bootp.h"
40#include "opt_ipfw.h"
41#include "opt_ipdn.h"
42#include "opt_ipdivert.h"
43#include "opt_ipfilter.h"
44#include "opt_ipstealth.h"
45#include "opt_ipsec.h"
46#include "opt_pfil_hooks.h"
47
48#include <stddef.h>
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/mbuf.h>
53#include <sys/malloc.h>
54#include <sys/domain.h>
55#include <sys/protosw.h>
56#include <sys/socket.h>
57#include <sys/time.h>
58#include <sys/kernel.h>
59#include <sys/syslog.h>
60#include <sys/sysctl.h>
61
62#include <net/pfil.h>
63#include <net/if.h>
64#include <net/if_var.h>
65#include <net/if_dl.h>
66#include <net/route.h>
67#include <net/netisr.h>
68#include <net/intrq.h>
69
70#include <netinet/in.h>
71#include <netinet/in_systm.h>
72#include <netinet/in_var.h>
73#include <netinet/ip.h>
74#include <netinet/in_pcb.h>
75#include <netinet/ip_var.h>
76#include <netinet/ip_icmp.h>
77#include <machine/in_cksum.h>
78
79#include <netinet/ipprotosw.h>
80
81#include <sys/socketvar.h>
82
83#include <netinet/ip_fw.h>
84
85#ifdef IPSEC
86#include <netinet6/ipsec.h>
87#include <netkey/key.h>
88#endif
89
90#include "faith.h"
91#if defined(NFAITH) && NFAITH > 0
92#include <net/if_types.h>
93#endif
94
95#ifdef DUMMYNET
96#include <netinet/ip_dummynet.h>
97#endif
98
99int rsvp_on = 0;
100static int ip_rsvp_on;
101struct socket *ip_rsvpd;
102
103int ipforwarding = 0;
104SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
105 &ipforwarding, 0, "Enable IP forwarding between interfaces");
106
107static int ipsendredirects = 1; /* XXX */
108SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
109 &ipsendredirects, 0, "Enable sending IP redirects");
110
111int ip_defttl = IPDEFTTL;
112SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
113 &ip_defttl, 0, "Maximum TTL on IP packets");
114
115static int ip_dosourceroute = 0;
116SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
117 &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
118
119static int ip_acceptsourceroute = 0;
120SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
121 CTLFLAG_RW, &ip_acceptsourceroute, 0,
122 "Enable accepting source routed IP packets");
123
124static int ip_keepfaith = 0;
125SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
126 &ip_keepfaith, 0,
127 "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
128
129#ifdef DIAGNOSTIC
130static int ipprintfs = 0;
131#endif
132
133extern struct domain inetdomain;
134extern struct ipprotosw inetsw[];
135u_char ip_protox[IPPROTO_MAX];
136static int ipqmaxlen = IFQ_MAXLEN;
137struct in_ifaddrhead in_ifaddrhead; /* first inet address */
138SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
139 &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
140SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
141 &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
142
143struct ipstat ipstat;
144SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD,
145 &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
146
147/* Packet reassembly stuff */
148#define IPREASS_NHASH_LOG2 6
149#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
150#define IPREASS_HMASK (IPREASS_NHASH - 1)
151#define IPREASS_HASH(x,y) \
152 (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
153
154static struct ipq ipq[IPREASS_NHASH];
155static int nipq = 0; /* total # of reass queues */
156static int maxnipq;
157const int ipintrq_present = 1;
158
159#ifdef IPCTL_DEFMTU
160SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
161 &ip_mtu, 0, "Default MTU");
162#endif
163
164#ifdef IPSTEALTH
165static int ipstealth = 0;
166SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
167 &ipstealth, 0, "");
168#endif
169
170
171/* Firewall hooks */
172ip_fw_chk_t *ip_fw_chk_ptr;
173ip_fw_ctl_t *ip_fw_ctl_ptr;
174int fw_enable = 1 ;
175
176#ifdef DUMMYNET
177ip_dn_ctl_t *ip_dn_ctl_ptr;
178#endif
179
180
181/*
182 * We need to save the IP options in case a protocol wants to respond
183 * to an incoming packet over the same route if the packet got here
184 * using IP source routing. This allows connection establishment and
185 * maintenance when the remote end is on a network that is not known
186 * to us.
187 */
188static int ip_nhops = 0;
189static struct ip_srcrt {
190 struct in_addr dst; /* final destination */
191 char nop; /* one NOP to align */
192 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
193 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
194} ip_srcrt;
195
196struct sockaddr_in *ip_fw_fwd_addr;
197
198static void save_rte __P((u_char *, struct in_addr));
199static int ip_dooptions __P((struct mbuf *));
200static void ip_forward __P((struct mbuf *, int));
201static void ip_freef __P((struct ipq *));
202#ifdef IPDIVERT
203static struct mbuf *ip_reass __P((struct mbuf *,
204 struct ipq *, struct ipq *, u_int32_t *, u_int16_t *));
205#else
206static struct mbuf *ip_reass __P((struct mbuf *, struct ipq *, struct ipq *));
207#endif
208static struct in_ifaddr *ip_rtaddr __P((struct in_addr));
209static void ipintr __P((void));
210
211/*
212 * IP initialization: fill in IP protocol switch table.
213 * All protocols not implemented in kernel go to raw IP protocol handler.
214 */
215void
216ip_init()
217{
218 register struct ipprotosw *pr;
219 register int i;
220
221 TAILQ_INIT(&in_ifaddrhead);
222 pr = (struct ipprotosw *)pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
223 if (pr == 0)
224 panic("ip_init");
225 for (i = 0; i < IPPROTO_MAX; i++)
226 ip_protox[i] = pr - inetsw;
227 for (pr = (struct ipprotosw *)inetdomain.dom_protosw;
228 pr < (struct ipprotosw *)inetdomain.dom_protoswNPROTOSW; pr++)
229 if (pr->pr_domain->dom_family == PF_INET &&
230 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
231 ip_protox[pr->pr_protocol] = pr - inetsw;
232
233 for (i = 0; i < IPREASS_NHASH; i++)
234 ipq[i].next = ipq[i].prev = &ipq[i];
235
236 maxnipq = nmbclusters/4;
237
238 ip_id = time_second & 0xffff;
239 ipintrq.ifq_maxlen = ipqmaxlen;
240
241 register_netisr(NETISR_IP, ipintr);
242}
243
244static struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
245static struct route ipforward_rt;
246
247/*
248 * Ip input routine. Checksum and byte swap header. If fragmented
249 * try to reassemble. Process options. Pass to next level.
250 */
251void
252ip_input(struct mbuf *m)
253{
254 struct ip *ip;
255 struct ipq *fp;
256 struct in_ifaddr *ia = NULL;
257 int i, hlen, mff;
258 u_short sum;
259 u_int16_t divert_cookie; /* firewall cookie */
260#ifdef IPDIVERT
261 u_int32_t divert_info = 0; /* packet divert/tee info */
262#endif
263 struct ip_fw_chain *rule = NULL;
264#ifdef PFIL_HOOKS
265 struct packet_filter_hook *pfh;
266 struct mbuf *m0;
267 int rv;
268#endif /* PFIL_HOOKS */
269
270#ifdef IPDIVERT
271 /* Get and reset firewall cookie */
272 divert_cookie = ip_divert_cookie;
273 ip_divert_cookie = 0;
274#else
275 divert_cookie = 0;
276#endif
277
278#if defined(IPFIREWALL) && defined(DUMMYNET)
279 /*
280 * dummynet packet are prepended a vestigial mbuf with
281 * m_type = MT_DUMMYNET and m_data pointing to the matching
282 * rule.
283 */
284 if (m->m_type == MT_DUMMYNET) {
285 rule = (struct ip_fw_chain *)(m->m_data) ;
286 m = m->m_next ;
287 ip = mtod(m, struct ip *);
288 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
289 goto iphack ;
290 } else
291 rule = NULL ;
292#endif
293
294#ifdef DIAGNOSTIC
295 if (m == NULL || (m->m_flags & M_PKTHDR) == 0)
296 panic("ip_input no HDR");
297#endif
298 ipstat.ips_total++;
299
300 if (m->m_pkthdr.len < sizeof(struct ip))
301 goto tooshort;
302
303 if (m->m_len < sizeof (struct ip) &&
304 (m = m_pullup(m, sizeof (struct ip))) == 0) {
305 ipstat.ips_toosmall++;
306 return;
307 }
308 ip = mtod(m, struct ip *);
309
310 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
311 ipstat.ips_badvers++;
312 goto bad;
313 }
314
315 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
316 if (hlen < sizeof(struct ip)) { /* minimum header length */
317 ipstat.ips_badhlen++;
318 goto bad;
319 }
320 if (hlen > m->m_len) {
321 if ((m = m_pullup(m, hlen)) == 0) {
322 ipstat.ips_badhlen++;
323 return;
324 }
325 ip = mtod(m, struct ip *);
326 }
327 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
328 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
329 } else {
330 if (hlen == sizeof(struct ip)) {
331 sum = in_cksum_hdr(ip);
332 } else {
333 sum = in_cksum(m, hlen);
334 }
335 }
336 if (sum) {
337 ipstat.ips_badsum++;
338 goto bad;
339 }
340
341 /*
342 * Convert fields to host representation.
343 */
344 NTOHS(ip->ip_len);
345 if (ip->ip_len < hlen) {
346 ipstat.ips_badlen++;
347 goto bad;
348 }
349 NTOHS(ip->ip_off);
350
351 /*
352 * Check that the amount of data in the buffers
353 * is as at least much as the IP header would have us expect.
354 * Trim mbufs if longer than we expect.
355 * Drop packet if shorter than we expect.
356 */
357 if (m->m_pkthdr.len < ip->ip_len) {
358tooshort:
359 ipstat.ips_tooshort++;
360 goto bad;
361 }
362 if (m->m_pkthdr.len > ip->ip_len) {
363 if (m->m_len == m->m_pkthdr.len) {
364 m->m_len = ip->ip_len;
365 m->m_pkthdr.len = ip->ip_len;
366 } else
367 m_adj(m, ip->ip_len - m->m_pkthdr.len);
368 }
369 /*
370 * IpHack's section.
371 * Right now when no processing on packet has done
372 * and it is still fresh out of network we do our black
373 * deals with it.
374 * - Firewall: deny/allow/divert
375 * - Xlate: translate packet's addr/port (NAT).
376 * - Pipe: pass pkt through dummynet.
377 * - Wrap: fake packet's addr/port <unimpl.>
378 * - Encapsulate: put it in another IP and send out. <unimp.>
379 */
380
381#if defined(IPFIREWALL) && defined(DUMMYNET)
382iphack:
383#endif
384
385#ifdef PFIL_HOOKS
386 /*
387 * Run through list of hooks for input packets. If there are any
388 * filters which require that additional packets in the flow are
389 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
390 * Note that filters must _never_ set this flag, as another filter
391 * in the list may have previously cleared it.
392 */
393 m0 = m;
394 pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
395 for (; pfh; pfh = pfh->pfil_link.tqe_next)
396 if (pfh->pfil_func) {
397 rv = pfh->pfil_func(ip, hlen,
398 m->m_pkthdr.rcvif, 0, &m0);
399 if (rv)
400 return;
401 m = m0;
402 if (m == NULL)
403 return;
404 ip = mtod(m, struct ip *);
405 }
406#endif /* PFIL_HOOKS */
407
408 if (fw_enable && ip_fw_chk_ptr) {
409#ifdef IPFIREWALL_FORWARD
410 /*
411 * If we've been forwarded from the output side, then
412 * skip the firewall a second time
413 */
414 if (ip_fw_fwd_addr)
415 goto ours;
416#endif /* IPFIREWALL_FORWARD */
417 /*
418 * See the comment in ip_output for the return values
419 * produced by the firewall.
420 */
421 i = (*ip_fw_chk_ptr)(&ip,
422 hlen, NULL, &divert_cookie, &m, &rule, &ip_fw_fwd_addr);
423 if (m == NULL) /* Packet discarded by firewall */
424 return;
425 if (i == 0 && ip_fw_fwd_addr == NULL) /* common case */
426 goto pass;
427#ifdef DUMMYNET
428 if ((i & IP_FW_PORT_DYNT_FLAG) != 0) {
429 /* Send packet to the appropriate pipe */
430 dummynet_io(i&0xffff,DN_TO_IP_IN,m,NULL,NULL,0, rule,
431 0);
432 return;
433 }
434#endif
435#ifdef IPDIVERT
436 if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
437 /* Divert or tee packet */
438 divert_info = i;
439 goto ours;
440 }
441#endif
442#ifdef IPFIREWALL_FORWARD
443 if (i == 0 && ip_fw_fwd_addr != NULL)
444 goto pass;
445#endif
446 /*
447 * if we get here, the packet must be dropped
448 */
449 m_freem(m);
450 return;
451 }
452pass:
453
454 /*
455 * Process options and, if not destined for us,
456 * ship it on. ip_dooptions returns 1 when an
457 * error was detected (causing an icmp message
458 * to be sent and the original packet to be freed).
459 */
460 ip_nhops = 0; /* for source routed packets */
461 if (hlen > sizeof (struct ip) && ip_dooptions(m)) {
462#ifdef IPFIREWALL_FORWARD
463 ip_fw_fwd_addr = NULL;
464#endif
465 return;
466 }
467
468 /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
469 * matter if it is destined to another node, or whether it is
470 * a multicast one, RSVP wants it! and prevents it from being forwarded
471 * anywhere else. Also checks if the rsvp daemon is running before
472 * grabbing the packet.
473 */
474 if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
475 goto ours;
476
477 /*
478 * Check our list of addresses, to see if the packet is for us.
479 * If we don't have any addresses, assume any unicast packet
480 * we receive might be for us (and let the upper layers deal
481 * with it).
482 */
483 if (TAILQ_EMPTY(&in_ifaddrhead) &&
484 (m->m_flags & (M_MCAST|M_BCAST)) == 0)
485 goto ours;
486
487 for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
488 ia = TAILQ_NEXT(ia, ia_link)) {
489#define satosin(sa) ((struct sockaddr_in *)(sa))
490
491#ifdef BOOTP_COMPAT
492 if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
493 goto ours;
494#endif
495#ifdef IPFIREWALL_FORWARD
496 /*
497 * If the addr to forward to is one of ours, we pretend to
498 * be the destination for this packet.
499 */
500 if (ip_fw_fwd_addr == NULL) {
501 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
502 goto ours;
503 } else if (IA_SIN(ia)->sin_addr.s_addr ==
504 ip_fw_fwd_addr->sin_addr.s_addr)
505 goto ours;
506#else
507 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
508 goto ours;
509#endif
510 if (ia->ia_ifp && ia->ia_ifp->if_flags & IFF_BROADCAST) {
511 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
512 ip->ip_dst.s_addr)
513 goto ours;
514 if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
515 goto ours;
516 }
517 }
518 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
519 struct in_multi *inm;
520 if (ip_mrouter) {
521 /*
522 * If we are acting as a multicast router, all
523 * incoming multicast packets are passed to the
524 * kernel-level multicast forwarding function.
525 * The packet is returned (relatively) intact; if
526 * ip_mforward() returns a non-zero value, the packet
527 * must be discarded, else it may be accepted below.
528 */
529 if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
530 ipstat.ips_cantforward++;
531 m_freem(m);
532 return;
533 }
534
535 /*
536 * The process-level routing demon needs to receive
537 * all multicast IGMP packets, whether or not this
538 * host belongs to their destination groups.
539 */
540 if (ip->ip_p == IPPROTO_IGMP)
541 goto ours;
542 ipstat.ips_forward++;
543 }
544 /*
545 * See if we belong to the destination multicast group on the
546 * arrival interface.
547 */
548 IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
549 if (inm == NULL) {
550 ipstat.ips_notmember++;
551 m_freem(m);
552 return;
553 }
554 goto ours;
555 }
556 if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
557 goto ours;
558 if (ip->ip_dst.s_addr == INADDR_ANY)
559 goto ours;
560
561#if defined(NFAITH) && 0 < NFAITH
562 /*
563 * FAITH(Firewall Aided Internet Translator)
564 */
565 if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
566 if (ip_keepfaith) {
567 if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
568 goto ours;
569 }
570 m_freem(m);
571 return;
572 }
573#endif
574 /*
575 * Not for us; forward if possible and desirable.
576 */
577 if (ipforwarding == 0) {
578 ipstat.ips_cantforward++;
579 m_freem(m);
580 } else
581 ip_forward(m, 0);
582#ifdef IPFIREWALL_FORWARD
583 ip_fw_fwd_addr = NULL;
584#endif
585 return;
586
587ours:
588 /* Count the packet in the ip address stats */
589 if (ia != NULL) {
590 ia->ia_ifa.if_ipackets++;
591 ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
592 }
593
594 /*
595 * If offset or IP_MF are set, must reassemble.
596 * Otherwise, nothing need be done.
597 * (We could look in the reassembly queue to see
598 * if the packet was previously fragmented,
599 * but it's not worth the time; just let them time out.)
600 */
601 if (ip->ip_off & (IP_MF | IP_OFFMASK | IP_RF)) {
602
603#if 0 /*
604 * Reassembly should be able to treat a mbuf cluster, for later
605 * operation of contiguous protocol headers on the cluster. (KAME)
606 */
607 if (m->m_flags & M_EXT) { /* XXX */
608 if ((m = m_pullup(m, hlen)) == 0) {
609 ipstat.ips_toosmall++;
610#ifdef IPFIREWALL_FORWARD
611 ip_fw_fwd_addr = NULL;
612#endif
613 return;
614 }
615 ip = mtod(m, struct ip *);
616 }
617#endif
618 sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
619 /*
620 * Look for queue of fragments
621 * of this datagram.
622 */
623 for (fp = ipq[sum].next; fp != &ipq[sum]; fp = fp->next)
624 if (ip->ip_id == fp->ipq_id &&
625 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
626 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
627 ip->ip_p == fp->ipq_p)
628 goto found;
629
630 fp = 0;
631
632 /* check if there's a place for the new queue */
633 if (nipq > maxnipq) {
634 /*
635 * drop something from the tail of the current queue
636 * before proceeding further
637 */
638 if (ipq[sum].prev == &ipq[sum]) { /* gak */
639 for (i = 0; i < IPREASS_NHASH; i++) {
640 if (ipq[i].prev != &ipq[i]) {
641 ip_freef(ipq[i].prev);
642 break;
643 }
644 }
645 } else
646 ip_freef(ipq[sum].prev);
647 }
648found:
649 /*
650 * Adjust ip_len to not reflect header,
651 * set ip_mff if more fragments are expected,
652 * convert offset of this to bytes.
653 */
654 ip->ip_len -= hlen;
655 mff = (ip->ip_off & IP_MF) != 0;
656 if (mff) {
657 /*
658 * Make sure that fragments have a data length
659 * that's a non-zero multiple of 8 bytes.
660 */
661 if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
662 ipstat.ips_toosmall++; /* XXX */
663 goto bad;
664 }
665 m->m_flags |= M_FRAG;
666 }
667 ip->ip_off <<= 3;
668
669 /*
670 * If datagram marked as having more fragments
671 * or if this is not the first fragment,
672 * attempt reassembly; if it succeeds, proceed.
673 */
674 if (mff || ip->ip_off) {
675 ipstat.ips_fragments++;
676 m->m_pkthdr.header = ip;
677#ifdef IPDIVERT
678 m = ip_reass(m,
679 fp, &ipq[sum], &divert_info, &divert_cookie);
680#else
681 m = ip_reass(m, fp, &ipq[sum]);
682#endif
683 if (m == 0) {
684#ifdef IPFIREWALL_FORWARD
685 ip_fw_fwd_addr = NULL;
686#endif
687 return;
688 }
35 */
36
37#define _IP_VHL
38
39#include "opt_bootp.h"
40#include "opt_ipfw.h"
41#include "opt_ipdn.h"
42#include "opt_ipdivert.h"
43#include "opt_ipfilter.h"
44#include "opt_ipstealth.h"
45#include "opt_ipsec.h"
46#include "opt_pfil_hooks.h"
47
48#include <stddef.h>
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/mbuf.h>
53#include <sys/malloc.h>
54#include <sys/domain.h>
55#include <sys/protosw.h>
56#include <sys/socket.h>
57#include <sys/time.h>
58#include <sys/kernel.h>
59#include <sys/syslog.h>
60#include <sys/sysctl.h>
61
62#include <net/pfil.h>
63#include <net/if.h>
64#include <net/if_var.h>
65#include <net/if_dl.h>
66#include <net/route.h>
67#include <net/netisr.h>
68#include <net/intrq.h>
69
70#include <netinet/in.h>
71#include <netinet/in_systm.h>
72#include <netinet/in_var.h>
73#include <netinet/ip.h>
74#include <netinet/in_pcb.h>
75#include <netinet/ip_var.h>
76#include <netinet/ip_icmp.h>
77#include <machine/in_cksum.h>
78
79#include <netinet/ipprotosw.h>
80
81#include <sys/socketvar.h>
82
83#include <netinet/ip_fw.h>
84
85#ifdef IPSEC
86#include <netinet6/ipsec.h>
87#include <netkey/key.h>
88#endif
89
90#include "faith.h"
91#if defined(NFAITH) && NFAITH > 0
92#include <net/if_types.h>
93#endif
94
95#ifdef DUMMYNET
96#include <netinet/ip_dummynet.h>
97#endif
98
99int rsvp_on = 0;
100static int ip_rsvp_on;
101struct socket *ip_rsvpd;
102
103int ipforwarding = 0;
104SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
105 &ipforwarding, 0, "Enable IP forwarding between interfaces");
106
107static int ipsendredirects = 1; /* XXX */
108SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
109 &ipsendredirects, 0, "Enable sending IP redirects");
110
111int ip_defttl = IPDEFTTL;
112SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
113 &ip_defttl, 0, "Maximum TTL on IP packets");
114
115static int ip_dosourceroute = 0;
116SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
117 &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
118
119static int ip_acceptsourceroute = 0;
120SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
121 CTLFLAG_RW, &ip_acceptsourceroute, 0,
122 "Enable accepting source routed IP packets");
123
124static int ip_keepfaith = 0;
125SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
126 &ip_keepfaith, 0,
127 "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
128
129#ifdef DIAGNOSTIC
130static int ipprintfs = 0;
131#endif
132
133extern struct domain inetdomain;
134extern struct ipprotosw inetsw[];
135u_char ip_protox[IPPROTO_MAX];
136static int ipqmaxlen = IFQ_MAXLEN;
137struct in_ifaddrhead in_ifaddrhead; /* first inet address */
138SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
139 &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
140SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
141 &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
142
143struct ipstat ipstat;
144SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD,
145 &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
146
147/* Packet reassembly stuff */
148#define IPREASS_NHASH_LOG2 6
149#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
150#define IPREASS_HMASK (IPREASS_NHASH - 1)
151#define IPREASS_HASH(x,y) \
152 (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
153
154static struct ipq ipq[IPREASS_NHASH];
155static int nipq = 0; /* total # of reass queues */
156static int maxnipq;
157const int ipintrq_present = 1;
158
159#ifdef IPCTL_DEFMTU
160SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
161 &ip_mtu, 0, "Default MTU");
162#endif
163
164#ifdef IPSTEALTH
165static int ipstealth = 0;
166SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
167 &ipstealth, 0, "");
168#endif
169
170
171/* Firewall hooks */
172ip_fw_chk_t *ip_fw_chk_ptr;
173ip_fw_ctl_t *ip_fw_ctl_ptr;
174int fw_enable = 1 ;
175
176#ifdef DUMMYNET
177ip_dn_ctl_t *ip_dn_ctl_ptr;
178#endif
179
180
181/*
182 * We need to save the IP options in case a protocol wants to respond
183 * to an incoming packet over the same route if the packet got here
184 * using IP source routing. This allows connection establishment and
185 * maintenance when the remote end is on a network that is not known
186 * to us.
187 */
188static int ip_nhops = 0;
189static struct ip_srcrt {
190 struct in_addr dst; /* final destination */
191 char nop; /* one NOP to align */
192 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
193 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
194} ip_srcrt;
195
196struct sockaddr_in *ip_fw_fwd_addr;
197
198static void save_rte __P((u_char *, struct in_addr));
199static int ip_dooptions __P((struct mbuf *));
200static void ip_forward __P((struct mbuf *, int));
201static void ip_freef __P((struct ipq *));
202#ifdef IPDIVERT
203static struct mbuf *ip_reass __P((struct mbuf *,
204 struct ipq *, struct ipq *, u_int32_t *, u_int16_t *));
205#else
206static struct mbuf *ip_reass __P((struct mbuf *, struct ipq *, struct ipq *));
207#endif
208static struct in_ifaddr *ip_rtaddr __P((struct in_addr));
209static void ipintr __P((void));
210
211/*
212 * IP initialization: fill in IP protocol switch table.
213 * All protocols not implemented in kernel go to raw IP protocol handler.
214 */
215void
216ip_init()
217{
218 register struct ipprotosw *pr;
219 register int i;
220
221 TAILQ_INIT(&in_ifaddrhead);
222 pr = (struct ipprotosw *)pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
223 if (pr == 0)
224 panic("ip_init");
225 for (i = 0; i < IPPROTO_MAX; i++)
226 ip_protox[i] = pr - inetsw;
227 for (pr = (struct ipprotosw *)inetdomain.dom_protosw;
228 pr < (struct ipprotosw *)inetdomain.dom_protoswNPROTOSW; pr++)
229 if (pr->pr_domain->dom_family == PF_INET &&
230 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
231 ip_protox[pr->pr_protocol] = pr - inetsw;
232
233 for (i = 0; i < IPREASS_NHASH; i++)
234 ipq[i].next = ipq[i].prev = &ipq[i];
235
236 maxnipq = nmbclusters/4;
237
238 ip_id = time_second & 0xffff;
239 ipintrq.ifq_maxlen = ipqmaxlen;
240
241 register_netisr(NETISR_IP, ipintr);
242}
243
244static struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
245static struct route ipforward_rt;
246
247/*
248 * Ip input routine. Checksum and byte swap header. If fragmented
249 * try to reassemble. Process options. Pass to next level.
250 */
251void
252ip_input(struct mbuf *m)
253{
254 struct ip *ip;
255 struct ipq *fp;
256 struct in_ifaddr *ia = NULL;
257 int i, hlen, mff;
258 u_short sum;
259 u_int16_t divert_cookie; /* firewall cookie */
260#ifdef IPDIVERT
261 u_int32_t divert_info = 0; /* packet divert/tee info */
262#endif
263 struct ip_fw_chain *rule = NULL;
264#ifdef PFIL_HOOKS
265 struct packet_filter_hook *pfh;
266 struct mbuf *m0;
267 int rv;
268#endif /* PFIL_HOOKS */
269
270#ifdef IPDIVERT
271 /* Get and reset firewall cookie */
272 divert_cookie = ip_divert_cookie;
273 ip_divert_cookie = 0;
274#else
275 divert_cookie = 0;
276#endif
277
278#if defined(IPFIREWALL) && defined(DUMMYNET)
279 /*
280 * dummynet packet are prepended a vestigial mbuf with
281 * m_type = MT_DUMMYNET and m_data pointing to the matching
282 * rule.
283 */
284 if (m->m_type == MT_DUMMYNET) {
285 rule = (struct ip_fw_chain *)(m->m_data) ;
286 m = m->m_next ;
287 ip = mtod(m, struct ip *);
288 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
289 goto iphack ;
290 } else
291 rule = NULL ;
292#endif
293
294#ifdef DIAGNOSTIC
295 if (m == NULL || (m->m_flags & M_PKTHDR) == 0)
296 panic("ip_input no HDR");
297#endif
298 ipstat.ips_total++;
299
300 if (m->m_pkthdr.len < sizeof(struct ip))
301 goto tooshort;
302
303 if (m->m_len < sizeof (struct ip) &&
304 (m = m_pullup(m, sizeof (struct ip))) == 0) {
305 ipstat.ips_toosmall++;
306 return;
307 }
308 ip = mtod(m, struct ip *);
309
310 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
311 ipstat.ips_badvers++;
312 goto bad;
313 }
314
315 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
316 if (hlen < sizeof(struct ip)) { /* minimum header length */
317 ipstat.ips_badhlen++;
318 goto bad;
319 }
320 if (hlen > m->m_len) {
321 if ((m = m_pullup(m, hlen)) == 0) {
322 ipstat.ips_badhlen++;
323 return;
324 }
325 ip = mtod(m, struct ip *);
326 }
327 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
328 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
329 } else {
330 if (hlen == sizeof(struct ip)) {
331 sum = in_cksum_hdr(ip);
332 } else {
333 sum = in_cksum(m, hlen);
334 }
335 }
336 if (sum) {
337 ipstat.ips_badsum++;
338 goto bad;
339 }
340
341 /*
342 * Convert fields to host representation.
343 */
344 NTOHS(ip->ip_len);
345 if (ip->ip_len < hlen) {
346 ipstat.ips_badlen++;
347 goto bad;
348 }
349 NTOHS(ip->ip_off);
350
351 /*
352 * Check that the amount of data in the buffers
353 * is as at least much as the IP header would have us expect.
354 * Trim mbufs if longer than we expect.
355 * Drop packet if shorter than we expect.
356 */
357 if (m->m_pkthdr.len < ip->ip_len) {
358tooshort:
359 ipstat.ips_tooshort++;
360 goto bad;
361 }
362 if (m->m_pkthdr.len > ip->ip_len) {
363 if (m->m_len == m->m_pkthdr.len) {
364 m->m_len = ip->ip_len;
365 m->m_pkthdr.len = ip->ip_len;
366 } else
367 m_adj(m, ip->ip_len - m->m_pkthdr.len);
368 }
369 /*
370 * IpHack's section.
371 * Right now when no processing on packet has done
372 * and it is still fresh out of network we do our black
373 * deals with it.
374 * - Firewall: deny/allow/divert
375 * - Xlate: translate packet's addr/port (NAT).
376 * - Pipe: pass pkt through dummynet.
377 * - Wrap: fake packet's addr/port <unimpl.>
378 * - Encapsulate: put it in another IP and send out. <unimp.>
379 */
380
381#if defined(IPFIREWALL) && defined(DUMMYNET)
382iphack:
383#endif
384
385#ifdef PFIL_HOOKS
386 /*
387 * Run through list of hooks for input packets. If there are any
388 * filters which require that additional packets in the flow are
389 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
390 * Note that filters must _never_ set this flag, as another filter
391 * in the list may have previously cleared it.
392 */
393 m0 = m;
394 pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
395 for (; pfh; pfh = pfh->pfil_link.tqe_next)
396 if (pfh->pfil_func) {
397 rv = pfh->pfil_func(ip, hlen,
398 m->m_pkthdr.rcvif, 0, &m0);
399 if (rv)
400 return;
401 m = m0;
402 if (m == NULL)
403 return;
404 ip = mtod(m, struct ip *);
405 }
406#endif /* PFIL_HOOKS */
407
408 if (fw_enable && ip_fw_chk_ptr) {
409#ifdef IPFIREWALL_FORWARD
410 /*
411 * If we've been forwarded from the output side, then
412 * skip the firewall a second time
413 */
414 if (ip_fw_fwd_addr)
415 goto ours;
416#endif /* IPFIREWALL_FORWARD */
417 /*
418 * See the comment in ip_output for the return values
419 * produced by the firewall.
420 */
421 i = (*ip_fw_chk_ptr)(&ip,
422 hlen, NULL, &divert_cookie, &m, &rule, &ip_fw_fwd_addr);
423 if (m == NULL) /* Packet discarded by firewall */
424 return;
425 if (i == 0 && ip_fw_fwd_addr == NULL) /* common case */
426 goto pass;
427#ifdef DUMMYNET
428 if ((i & IP_FW_PORT_DYNT_FLAG) != 0) {
429 /* Send packet to the appropriate pipe */
430 dummynet_io(i&0xffff,DN_TO_IP_IN,m,NULL,NULL,0, rule,
431 0);
432 return;
433 }
434#endif
435#ifdef IPDIVERT
436 if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
437 /* Divert or tee packet */
438 divert_info = i;
439 goto ours;
440 }
441#endif
442#ifdef IPFIREWALL_FORWARD
443 if (i == 0 && ip_fw_fwd_addr != NULL)
444 goto pass;
445#endif
446 /*
447 * if we get here, the packet must be dropped
448 */
449 m_freem(m);
450 return;
451 }
452pass:
453
454 /*
455 * Process options and, if not destined for us,
456 * ship it on. ip_dooptions returns 1 when an
457 * error was detected (causing an icmp message
458 * to be sent and the original packet to be freed).
459 */
460 ip_nhops = 0; /* for source routed packets */
461 if (hlen > sizeof (struct ip) && ip_dooptions(m)) {
462#ifdef IPFIREWALL_FORWARD
463 ip_fw_fwd_addr = NULL;
464#endif
465 return;
466 }
467
468 /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
469 * matter if it is destined to another node, or whether it is
470 * a multicast one, RSVP wants it! and prevents it from being forwarded
471 * anywhere else. Also checks if the rsvp daemon is running before
472 * grabbing the packet.
473 */
474 if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
475 goto ours;
476
477 /*
478 * Check our list of addresses, to see if the packet is for us.
479 * If we don't have any addresses, assume any unicast packet
480 * we receive might be for us (and let the upper layers deal
481 * with it).
482 */
483 if (TAILQ_EMPTY(&in_ifaddrhead) &&
484 (m->m_flags & (M_MCAST|M_BCAST)) == 0)
485 goto ours;
486
487 for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
488 ia = TAILQ_NEXT(ia, ia_link)) {
489#define satosin(sa) ((struct sockaddr_in *)(sa))
490
491#ifdef BOOTP_COMPAT
492 if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
493 goto ours;
494#endif
495#ifdef IPFIREWALL_FORWARD
496 /*
497 * If the addr to forward to is one of ours, we pretend to
498 * be the destination for this packet.
499 */
500 if (ip_fw_fwd_addr == NULL) {
501 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
502 goto ours;
503 } else if (IA_SIN(ia)->sin_addr.s_addr ==
504 ip_fw_fwd_addr->sin_addr.s_addr)
505 goto ours;
506#else
507 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
508 goto ours;
509#endif
510 if (ia->ia_ifp && ia->ia_ifp->if_flags & IFF_BROADCAST) {
511 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
512 ip->ip_dst.s_addr)
513 goto ours;
514 if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
515 goto ours;
516 }
517 }
518 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
519 struct in_multi *inm;
520 if (ip_mrouter) {
521 /*
522 * If we are acting as a multicast router, all
523 * incoming multicast packets are passed to the
524 * kernel-level multicast forwarding function.
525 * The packet is returned (relatively) intact; if
526 * ip_mforward() returns a non-zero value, the packet
527 * must be discarded, else it may be accepted below.
528 */
529 if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
530 ipstat.ips_cantforward++;
531 m_freem(m);
532 return;
533 }
534
535 /*
536 * The process-level routing demon needs to receive
537 * all multicast IGMP packets, whether or not this
538 * host belongs to their destination groups.
539 */
540 if (ip->ip_p == IPPROTO_IGMP)
541 goto ours;
542 ipstat.ips_forward++;
543 }
544 /*
545 * See if we belong to the destination multicast group on the
546 * arrival interface.
547 */
548 IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
549 if (inm == NULL) {
550 ipstat.ips_notmember++;
551 m_freem(m);
552 return;
553 }
554 goto ours;
555 }
556 if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
557 goto ours;
558 if (ip->ip_dst.s_addr == INADDR_ANY)
559 goto ours;
560
561#if defined(NFAITH) && 0 < NFAITH
562 /*
563 * FAITH(Firewall Aided Internet Translator)
564 */
565 if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
566 if (ip_keepfaith) {
567 if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
568 goto ours;
569 }
570 m_freem(m);
571 return;
572 }
573#endif
574 /*
575 * Not for us; forward if possible and desirable.
576 */
577 if (ipforwarding == 0) {
578 ipstat.ips_cantforward++;
579 m_freem(m);
580 } else
581 ip_forward(m, 0);
582#ifdef IPFIREWALL_FORWARD
583 ip_fw_fwd_addr = NULL;
584#endif
585 return;
586
587ours:
588 /* Count the packet in the ip address stats */
589 if (ia != NULL) {
590 ia->ia_ifa.if_ipackets++;
591 ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
592 }
593
594 /*
595 * If offset or IP_MF are set, must reassemble.
596 * Otherwise, nothing need be done.
597 * (We could look in the reassembly queue to see
598 * if the packet was previously fragmented,
599 * but it's not worth the time; just let them time out.)
600 */
601 if (ip->ip_off & (IP_MF | IP_OFFMASK | IP_RF)) {
602
603#if 0 /*
604 * Reassembly should be able to treat a mbuf cluster, for later
605 * operation of contiguous protocol headers on the cluster. (KAME)
606 */
607 if (m->m_flags & M_EXT) { /* XXX */
608 if ((m = m_pullup(m, hlen)) == 0) {
609 ipstat.ips_toosmall++;
610#ifdef IPFIREWALL_FORWARD
611 ip_fw_fwd_addr = NULL;
612#endif
613 return;
614 }
615 ip = mtod(m, struct ip *);
616 }
617#endif
618 sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
619 /*
620 * Look for queue of fragments
621 * of this datagram.
622 */
623 for (fp = ipq[sum].next; fp != &ipq[sum]; fp = fp->next)
624 if (ip->ip_id == fp->ipq_id &&
625 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
626 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
627 ip->ip_p == fp->ipq_p)
628 goto found;
629
630 fp = 0;
631
632 /* check if there's a place for the new queue */
633 if (nipq > maxnipq) {
634 /*
635 * drop something from the tail of the current queue
636 * before proceeding further
637 */
638 if (ipq[sum].prev == &ipq[sum]) { /* gak */
639 for (i = 0; i < IPREASS_NHASH; i++) {
640 if (ipq[i].prev != &ipq[i]) {
641 ip_freef(ipq[i].prev);
642 break;
643 }
644 }
645 } else
646 ip_freef(ipq[sum].prev);
647 }
648found:
649 /*
650 * Adjust ip_len to not reflect header,
651 * set ip_mff if more fragments are expected,
652 * convert offset of this to bytes.
653 */
654 ip->ip_len -= hlen;
655 mff = (ip->ip_off & IP_MF) != 0;
656 if (mff) {
657 /*
658 * Make sure that fragments have a data length
659 * that's a non-zero multiple of 8 bytes.
660 */
661 if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
662 ipstat.ips_toosmall++; /* XXX */
663 goto bad;
664 }
665 m->m_flags |= M_FRAG;
666 }
667 ip->ip_off <<= 3;
668
669 /*
670 * If datagram marked as having more fragments
671 * or if this is not the first fragment,
672 * attempt reassembly; if it succeeds, proceed.
673 */
674 if (mff || ip->ip_off) {
675 ipstat.ips_fragments++;
676 m->m_pkthdr.header = ip;
677#ifdef IPDIVERT
678 m = ip_reass(m,
679 fp, &ipq[sum], &divert_info, &divert_cookie);
680#else
681 m = ip_reass(m, fp, &ipq[sum]);
682#endif
683 if (m == 0) {
684#ifdef IPFIREWALL_FORWARD
685 ip_fw_fwd_addr = NULL;
686#endif
687 return;
688 }
689 /* Get the length of the reassembled packets header */
690 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
691 ipstat.ips_reassembled++;
692 ip = mtod(m, struct ip *);
689 ipstat.ips_reassembled++;
690 ip = mtod(m, struct ip *);
691 /* Get the header length of the reassembled packet */
692 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
693#ifdef IPDIVERT
694 /* Restore original checksum before diverting packet */
695 if (divert_info != 0) {
696 ip->ip_len += hlen;
697 HTONS(ip->ip_len);
698 HTONS(ip->ip_off);
699 ip->ip_sum = 0;
700 ip->ip_sum = in_cksum_hdr(ip);
701 NTOHS(ip->ip_off);
702 NTOHS(ip->ip_len);
703 ip->ip_len -= hlen;
704 }
705#endif
706 } else
707 if (fp)
708 ip_freef(fp);
709 } else
710 ip->ip_len -= hlen;
711
712#ifdef IPDIVERT
713 /*
714 * Divert or tee packet to the divert protocol if required.
715 *
716 * If divert_info is zero then cookie should be too, so we shouldn't
717 * need to clear them here. Assume divert_packet() does so also.
718 */
719 if (divert_info != 0) {
720 struct mbuf *clone = NULL;
721
722 /* Clone packet if we're doing a 'tee' */
723 if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
724 clone = m_dup(m, M_DONTWAIT);
725
726 /* Restore packet header fields to original values */
727 ip->ip_len += hlen;
728 HTONS(ip->ip_len);
729 HTONS(ip->ip_off);
730
731 /* Deliver packet to divert input routine */
732 ip_divert_cookie = divert_cookie;
733 divert_packet(m, 1, divert_info & 0xffff);
734 ipstat.ips_delivered++;
735
736 /* If 'tee', continue with original packet */
737 if (clone == NULL)
738 return;
739 m = clone;
740 ip = mtod(m, struct ip *);
741 }
742#endif
743
744 /*
745 * Switch out to protocol's input routine.
746 */
747 ipstat.ips_delivered++;
748 {
749 int off = hlen, nh = ip->ip_p;
750
751 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, off, nh);
752#ifdef IPFIREWALL_FORWARD
753 ip_fw_fwd_addr = NULL; /* tcp needed it */
754#endif
755 return;
756 }
757bad:
758#ifdef IPFIREWALL_FORWARD
759 ip_fw_fwd_addr = NULL;
760#endif
761 m_freem(m);
762}
763
764/*
765 * IP software interrupt routine - to go away sometime soon
766 */
767static void
768ipintr(void)
769{
770 int s;
771 struct mbuf *m;
772
773 while(1) {
774 s = splimp();
775 IF_DEQUEUE(&ipintrq, m);
776 splx(s);
777 if (m == 0)
778 return;
779 ip_input(m);
780 }
781}
782
783/*
784 * Take incoming datagram fragment and try to reassemble it into
785 * whole datagram. If a chain for reassembly of this datagram already
786 * exists, then it is given as fp; otherwise have to make a chain.
787 *
788 * When IPDIVERT enabled, keep additional state with each packet that
789 * tells us if we need to divert or tee the packet we're building.
790 */
791
792static struct mbuf *
793#ifdef IPDIVERT
794ip_reass(m, fp, where, divinfo, divcookie)
795#else
796ip_reass(m, fp, where)
797#endif
798 register struct mbuf *m;
799 register struct ipq *fp;
800 struct ipq *where;
801#ifdef IPDIVERT
802 u_int32_t *divinfo;
803 u_int16_t *divcookie;
804#endif
805{
806 struct ip *ip = mtod(m, struct ip *);
807 register struct mbuf *p = 0, *q, *nq;
808 struct mbuf *t;
809 int hlen = IP_VHL_HL(ip->ip_vhl) << 2;
810 int i, next;
811
812 /*
813 * Presence of header sizes in mbufs
814 * would confuse code below.
815 */
816 m->m_data += hlen;
817 m->m_len -= hlen;
818
819 /*
820 * If first fragment to arrive, create a reassembly queue.
821 */
822 if (fp == 0) {
823 if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
824 goto dropfrag;
825 fp = mtod(t, struct ipq *);
826 insque(fp, where);
827 nipq++;
828 fp->ipq_ttl = IPFRAGTTL;
829 fp->ipq_p = ip->ip_p;
830 fp->ipq_id = ip->ip_id;
831 fp->ipq_src = ip->ip_src;
832 fp->ipq_dst = ip->ip_dst;
833 fp->ipq_frags = m;
834 m->m_nextpkt = NULL;
835#ifdef IPDIVERT
836 fp->ipq_div_info = 0;
837 fp->ipq_div_cookie = 0;
838#endif
839 goto inserted;
840 }
841
842#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header))
843
844 /*
845 * Find a segment which begins after this one does.
846 */
847 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
848 if (GETIP(q)->ip_off > ip->ip_off)
849 break;
850
851 /*
852 * If there is a preceding segment, it may provide some of
853 * our data already. If so, drop the data from the incoming
854 * segment. If it provides all of our data, drop us, otherwise
855 * stick new segment in the proper place.
856 *
857 * If some of the data is dropped from the the preceding
858 * segment, then it's checksum is invalidated.
859 */
860 if (p) {
861 i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
862 if (i > 0) {
863 if (i >= ip->ip_len)
864 goto dropfrag;
865 m_adj(m, i);
866 m->m_pkthdr.csum_flags = 0;
867 ip->ip_off += i;
868 ip->ip_len -= i;
869 }
870 m->m_nextpkt = p->m_nextpkt;
871 p->m_nextpkt = m;
872 } else {
873 m->m_nextpkt = fp->ipq_frags;
874 fp->ipq_frags = m;
875 }
876
877 /*
878 * While we overlap succeeding segments trim them or,
879 * if they are completely covered, dequeue them.
880 */
881 for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
882 q = nq) {
883 i = (ip->ip_off + ip->ip_len) -
884 GETIP(q)->ip_off;
885 if (i < GETIP(q)->ip_len) {
886 GETIP(q)->ip_len -= i;
887 GETIP(q)->ip_off += i;
888 m_adj(q, i);
889 q->m_pkthdr.csum_flags = 0;
890 break;
891 }
892 nq = q->m_nextpkt;
893 m->m_nextpkt = nq;
894 m_freem(q);
895 }
896
897inserted:
898
899#ifdef IPDIVERT
900 /*
901 * Transfer firewall instructions to the fragment structure.
902 * Any fragment diverting causes the whole packet to divert.
903 */
904 fp->ipq_div_info = *divinfo;
905 fp->ipq_div_cookie = *divcookie;
906 *divinfo = 0;
907 *divcookie = 0;
908#endif
909
910 /*
911 * Check for complete reassembly.
912 */
913 next = 0;
914 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
915 if (GETIP(q)->ip_off != next)
916 return (0);
917 next += GETIP(q)->ip_len;
918 }
919 /* Make sure the last packet didn't have the IP_MF flag */
920 if (p->m_flags & M_FRAG)
921 return (0);
922
923 /*
924 * Reassembly is complete. Make sure the packet is a sane size.
925 */
926 q = fp->ipq_frags;
927 ip = GETIP(q);
928 if (next + (IP_VHL_HL(ip->ip_vhl) << 2) > IP_MAXPACKET) {
929 ipstat.ips_toolong++;
930 ip_freef(fp);
931 return (0);
932 }
933
934 /*
935 * Concatenate fragments.
936 */
937 m = q;
938 t = m->m_next;
939 m->m_next = 0;
940 m_cat(m, t);
941 nq = q->m_nextpkt;
942 q->m_nextpkt = 0;
943 for (q = nq; q != NULL; q = nq) {
944 nq = q->m_nextpkt;
945 q->m_nextpkt = NULL;
946 m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
947 m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
948 m_cat(m, q);
949 }
950
951#ifdef IPDIVERT
952 /*
953 * Extract firewall instructions from the fragment structure.
954 */
955 *divinfo = fp->ipq_div_info;
956 *divcookie = fp->ipq_div_cookie;
957#endif
958
959 /*
960 * Create header for new ip packet by
961 * modifying header of first packet;
962 * dequeue and discard fragment reassembly header.
963 * Make header visible.
964 */
965 ip->ip_len = next;
966 ip->ip_src = fp->ipq_src;
967 ip->ip_dst = fp->ipq_dst;
968 remque(fp);
969 nipq--;
970 (void) m_free(dtom(fp));
971 m->m_len += (IP_VHL_HL(ip->ip_vhl) << 2);
972 m->m_data -= (IP_VHL_HL(ip->ip_vhl) << 2);
973 /* some debugging cruft by sklower, below, will go away soon */
974 if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
975 register int plen = 0;
976 for (t = m; t; t = t->m_next)
977 plen += t->m_len;
978 m->m_pkthdr.len = plen;
979 }
980 return (m);
981
982dropfrag:
983#ifdef IPDIVERT
984 *divinfo = 0;
985 *divcookie = 0;
986#endif
987 ipstat.ips_fragdropped++;
988 m_freem(m);
989 return (0);
990
991#undef GETIP
992}
993
994/*
995 * Free a fragment reassembly header and all
996 * associated datagrams.
997 */
998static void
999ip_freef(fp)
1000 struct ipq *fp;
1001{
1002 register struct mbuf *q;
1003
1004 while (fp->ipq_frags) {
1005 q = fp->ipq_frags;
1006 fp->ipq_frags = q->m_nextpkt;
1007 m_freem(q);
1008 }
1009 remque(fp);
1010 (void) m_free(dtom(fp));
1011 nipq--;
1012}
1013
1014/*
1015 * IP timer processing;
1016 * if a timer expires on a reassembly
1017 * queue, discard it.
1018 */
1019void
1020ip_slowtimo()
1021{
1022 register struct ipq *fp;
1023 int s = splnet();
1024 int i;
1025
1026 for (i = 0; i < IPREASS_NHASH; i++) {
1027 fp = ipq[i].next;
1028 if (fp == 0)
1029 continue;
1030 while (fp != &ipq[i]) {
1031 --fp->ipq_ttl;
1032 fp = fp->next;
1033 if (fp->prev->ipq_ttl == 0) {
1034 ipstat.ips_fragtimeout++;
1035 ip_freef(fp->prev);
1036 }
1037 }
1038 }
1039 ipflow_slowtimo();
1040 splx(s);
1041}
1042
1043/*
1044 * Drain off all datagram fragments.
1045 */
1046void
1047ip_drain()
1048{
1049 int i;
1050
1051 for (i = 0; i < IPREASS_NHASH; i++) {
1052 while (ipq[i].next != &ipq[i]) {
1053 ipstat.ips_fragdropped++;
1054 ip_freef(ipq[i].next);
1055 }
1056 }
1057 in_rtqdrain();
1058}
1059
1060/*
1061 * Do option processing on a datagram,
1062 * possibly discarding it if bad options are encountered,
1063 * or forwarding it if source-routed.
1064 * Returns 1 if packet has been forwarded/freed,
1065 * 0 if the packet should be processed further.
1066 */
1067static int
1068ip_dooptions(m)
1069 struct mbuf *m;
1070{
1071 register struct ip *ip = mtod(m, struct ip *);
1072 register u_char *cp;
1073 register struct ip_timestamp *ipt;
1074 register struct in_ifaddr *ia;
1075 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1076 struct in_addr *sin, dst;
1077 n_time ntime;
1078
1079 dst = ip->ip_dst;
1080 cp = (u_char *)(ip + 1);
1081 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1082 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1083 opt = cp[IPOPT_OPTVAL];
1084 if (opt == IPOPT_EOL)
1085 break;
1086 if (opt == IPOPT_NOP)
1087 optlen = 1;
1088 else {
1089 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1090 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1091 goto bad;
1092 }
1093 optlen = cp[IPOPT_OLEN];
1094 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1095 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1096 goto bad;
1097 }
1098 }
1099 switch (opt) {
1100
1101 default:
1102 break;
1103
1104 /*
1105 * Source routing with record.
1106 * Find interface with current destination address.
1107 * If none on this machine then drop if strictly routed,
1108 * or do nothing if loosely routed.
1109 * Record interface address and bring up next address
1110 * component. If strictly routed make sure next
1111 * address is on directly accessible net.
1112 */
1113 case IPOPT_LSRR:
1114 case IPOPT_SSRR:
1115 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1116 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1117 goto bad;
1118 }
1119 ipaddr.sin_addr = ip->ip_dst;
1120 ia = (struct in_ifaddr *)
1121 ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1122 if (ia == 0) {
1123 if (opt == IPOPT_SSRR) {
1124 type = ICMP_UNREACH;
1125 code = ICMP_UNREACH_SRCFAIL;
1126 goto bad;
1127 }
1128 if (!ip_dosourceroute)
1129 goto nosourcerouting;
1130 /*
1131 * Loose routing, and not at next destination
1132 * yet; nothing to do except forward.
1133 */
1134 break;
1135 }
1136 off--; /* 0 origin */
1137 if (off > optlen - (int)sizeof(struct in_addr)) {
1138 /*
1139 * End of source route. Should be for us.
1140 */
1141 if (!ip_acceptsourceroute)
1142 goto nosourcerouting;
1143 save_rte(cp, ip->ip_src);
1144 break;
1145 }
1146
1147 if (!ip_dosourceroute) {
1148 if (ipforwarding) {
1149 char buf[16]; /* aaa.bbb.ccc.ddd\0 */
1150 /*
1151 * Acting as a router, so generate ICMP
1152 */
1153nosourcerouting:
1154 strcpy(buf, inet_ntoa(ip->ip_dst));
1155 log(LOG_WARNING,
1156 "attempted source route from %s to %s\n",
1157 inet_ntoa(ip->ip_src), buf);
1158 type = ICMP_UNREACH;
1159 code = ICMP_UNREACH_SRCFAIL;
1160 goto bad;
1161 } else {
1162 /*
1163 * Not acting as a router, so silently drop.
1164 */
1165 ipstat.ips_cantforward++;
1166 m_freem(m);
1167 return (1);
1168 }
1169 }
1170
1171 /*
1172 * locate outgoing interface
1173 */
1174 (void)memcpy(&ipaddr.sin_addr, cp + off,
1175 sizeof(ipaddr.sin_addr));
1176
1177 if (opt == IPOPT_SSRR) {
1178#define INA struct in_ifaddr *
1179#define SA struct sockaddr *
1180 if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1181 ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1182 } else
1183 ia = ip_rtaddr(ipaddr.sin_addr);
1184 if (ia == 0) {
1185 type = ICMP_UNREACH;
1186 code = ICMP_UNREACH_SRCFAIL;
1187 goto bad;
1188 }
1189 ip->ip_dst = ipaddr.sin_addr;
1190 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
1191 sizeof(struct in_addr));
1192 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1193 /*
1194 * Let ip_intr's mcast routing check handle mcast pkts
1195 */
1196 forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1197 break;
1198
1199 case IPOPT_RR:
1200 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1201 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1202 goto bad;
1203 }
1204 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1205 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1206 goto bad;
1207 }
1208 /*
1209 * If no space remains, ignore.
1210 */
1211 off--; /* 0 origin */
1212 if (off > optlen - (int)sizeof(struct in_addr))
1213 break;
1214 (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1215 sizeof(ipaddr.sin_addr));
1216 /*
1217 * locate outgoing interface; if we're the destination,
1218 * use the incoming interface (should be same).
1219 */
1220 if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
1221 (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
1222 type = ICMP_UNREACH;
1223 code = ICMP_UNREACH_HOST;
1224 goto bad;
1225 }
1226 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
1227 sizeof(struct in_addr));
1228 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1229 break;
1230
1231 case IPOPT_TS:
1232 code = cp - (u_char *)ip;
1233 ipt = (struct ip_timestamp *)cp;
1234 if (ipt->ipt_len < 5)
1235 goto bad;
1236 if (ipt->ipt_ptr >
1237 ipt->ipt_len - (int)sizeof(int32_t)) {
1238 if (++ipt->ipt_oflw == 0)
1239 goto bad;
1240 break;
1241 }
1242 sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
1243 switch (ipt->ipt_flg) {
1244
1245 case IPOPT_TS_TSONLY:
1246 break;
1247
1248 case IPOPT_TS_TSANDADDR:
1249 if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1250 sizeof(struct in_addr) > ipt->ipt_len)
1251 goto bad;
1252 ipaddr.sin_addr = dst;
1253 ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1254 m->m_pkthdr.rcvif);
1255 if (ia == 0)
1256 continue;
1257 (void)memcpy(sin, &IA_SIN(ia)->sin_addr,
1258 sizeof(struct in_addr));
1259 ipt->ipt_ptr += sizeof(struct in_addr);
1260 break;
1261
1262 case IPOPT_TS_PRESPEC:
1263 if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1264 sizeof(struct in_addr) > ipt->ipt_len)
1265 goto bad;
1266 (void)memcpy(&ipaddr.sin_addr, sin,
1267 sizeof(struct in_addr));
1268 if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1269 continue;
1270 ipt->ipt_ptr += sizeof(struct in_addr);
1271 break;
1272
1273 default:
1274 goto bad;
1275 }
1276 ntime = iptime();
1277 (void)memcpy(cp + ipt->ipt_ptr - 1, &ntime,
1278 sizeof(n_time));
1279 ipt->ipt_ptr += sizeof(n_time);
1280 }
1281 }
1282 if (forward && ipforwarding) {
1283 ip_forward(m, 1);
1284 return (1);
1285 }
1286 return (0);
1287bad:
1288 icmp_error(m, type, code, 0, 0);
1289 ipstat.ips_badoptions++;
1290 return (1);
1291}
1292
1293/*
1294 * Given address of next destination (final or next hop),
1295 * return internet address info of interface to be used to get there.
1296 */
1297static struct in_ifaddr *
1298ip_rtaddr(dst)
1299 struct in_addr dst;
1300{
1301 register struct sockaddr_in *sin;
1302
1303 sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
1304
1305 if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
1306 if (ipforward_rt.ro_rt) {
1307 RTFREE(ipforward_rt.ro_rt);
1308 ipforward_rt.ro_rt = 0;
1309 }
1310 sin->sin_family = AF_INET;
1311 sin->sin_len = sizeof(*sin);
1312 sin->sin_addr = dst;
1313
1314 rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
1315 }
1316 if (ipforward_rt.ro_rt == 0)
1317 return ((struct in_ifaddr *)0);
1318 return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
1319}
1320
1321/*
1322 * Save incoming source route for use in replies,
1323 * to be picked up later by ip_srcroute if the receiver is interested.
1324 */
1325void
1326save_rte(option, dst)
1327 u_char *option;
1328 struct in_addr dst;
1329{
1330 unsigned olen;
1331
1332 olen = option[IPOPT_OLEN];
1333#ifdef DIAGNOSTIC
1334 if (ipprintfs)
1335 printf("save_rte: olen %d\n", olen);
1336#endif
1337 if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1338 return;
1339 bcopy(option, ip_srcrt.srcopt, olen);
1340 ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1341 ip_srcrt.dst = dst;
1342}
1343
1344/*
1345 * Retrieve incoming source route for use in replies,
1346 * in the same form used by setsockopt.
1347 * The first hop is placed before the options, will be removed later.
1348 */
1349struct mbuf *
1350ip_srcroute()
1351{
1352 register struct in_addr *p, *q;
1353 register struct mbuf *m;
1354
1355 if (ip_nhops == 0)
1356 return ((struct mbuf *)0);
1357 m = m_get(M_DONTWAIT, MT_HEADER);
1358 if (m == 0)
1359 return ((struct mbuf *)0);
1360
1361#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1362
1363 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1364 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1365 OPTSIZ;
1366#ifdef DIAGNOSTIC
1367 if (ipprintfs)
1368 printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1369#endif
1370
1371 /*
1372 * First save first hop for return route
1373 */
1374 p = &ip_srcrt.route[ip_nhops - 1];
1375 *(mtod(m, struct in_addr *)) = *p--;
1376#ifdef DIAGNOSTIC
1377 if (ipprintfs)
1378 printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1379#endif
1380
1381 /*
1382 * Copy option fields and padding (nop) to mbuf.
1383 */
1384 ip_srcrt.nop = IPOPT_NOP;
1385 ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1386 (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
1387 &ip_srcrt.nop, OPTSIZ);
1388 q = (struct in_addr *)(mtod(m, caddr_t) +
1389 sizeof(struct in_addr) + OPTSIZ);
1390#undef OPTSIZ
1391 /*
1392 * Record return path as an IP source route,
1393 * reversing the path (pointers are now aligned).
1394 */
1395 while (p >= ip_srcrt.route) {
1396#ifdef DIAGNOSTIC
1397 if (ipprintfs)
1398 printf(" %lx", (u_long)ntohl(q->s_addr));
1399#endif
1400 *q++ = *p--;
1401 }
1402 /*
1403 * Last hop goes to final destination.
1404 */
1405 *q = ip_srcrt.dst;
1406#ifdef DIAGNOSTIC
1407 if (ipprintfs)
1408 printf(" %lx\n", (u_long)ntohl(q->s_addr));
1409#endif
1410 return (m);
1411}
1412
1413/*
1414 * Strip out IP options, at higher
1415 * level protocol in the kernel.
1416 * Second argument is buffer to which options
1417 * will be moved, and return value is their length.
1418 * XXX should be deleted; last arg currently ignored.
1419 */
1420void
1421ip_stripoptions(m, mopt)
1422 register struct mbuf *m;
1423 struct mbuf *mopt;
1424{
1425 register int i;
1426 struct ip *ip = mtod(m, struct ip *);
1427 register caddr_t opts;
1428 int olen;
1429
1430 olen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1431 opts = (caddr_t)(ip + 1);
1432 i = m->m_len - (sizeof (struct ip) + olen);
1433 bcopy(opts + olen, opts, (unsigned)i);
1434 m->m_len -= olen;
1435 if (m->m_flags & M_PKTHDR)
1436 m->m_pkthdr.len -= olen;
1437 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2);
1438}
1439
1440u_char inetctlerrmap[PRC_NCMDS] = {
1441 0, 0, 0, 0,
1442 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1443 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1444 EMSGSIZE, EHOSTUNREACH, 0, 0,
1445 0, 0, 0, 0,
1446 ENOPROTOOPT
1447};
1448
1449/*
1450 * Forward a packet. If some error occurs return the sender
1451 * an icmp packet. Note we can't always generate a meaningful
1452 * icmp message because icmp doesn't have a large enough repertoire
1453 * of codes and types.
1454 *
1455 * If not forwarding, just drop the packet. This could be confusing
1456 * if ipforwarding was zero but some routing protocol was advancing
1457 * us as a gateway to somewhere. However, we must let the routing
1458 * protocol deal with that.
1459 *
1460 * The srcrt parameter indicates whether the packet is being forwarded
1461 * via a source route.
1462 */
1463static void
1464ip_forward(m, srcrt)
1465 struct mbuf *m;
1466 int srcrt;
1467{
1468 register struct ip *ip = mtod(m, struct ip *);
1469 register struct sockaddr_in *sin;
1470 register struct rtentry *rt;
1471 int error, type = 0, code = 0;
1472 struct mbuf *mcopy;
1473 n_long dest;
1474 struct ifnet *destifp;
1475#ifdef IPSEC
1476 struct ifnet dummyifp;
1477#endif
1478
1479 dest = 0;
1480#ifdef DIAGNOSTIC
1481 if (ipprintfs)
1482 printf("forward: src %lx dst %lx ttl %x\n",
1483 (u_long)ip->ip_src.s_addr, (u_long)ip->ip_dst.s_addr,
1484 ip->ip_ttl);
1485#endif
1486
1487
1488 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
1489 ipstat.ips_cantforward++;
1490 m_freem(m);
1491 return;
1492 }
1493#ifdef IPSTEALTH
1494 if (!ipstealth) {
1495#endif
1496 if (ip->ip_ttl <= IPTTLDEC) {
1497 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
1498 dest, 0);
1499 return;
1500 }
1501#ifdef IPSTEALTH
1502 }
1503#endif
1504
1505 sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
1506 if ((rt = ipforward_rt.ro_rt) == 0 ||
1507 ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
1508 if (ipforward_rt.ro_rt) {
1509 RTFREE(ipforward_rt.ro_rt);
1510 ipforward_rt.ro_rt = 0;
1511 }
1512 sin->sin_family = AF_INET;
1513 sin->sin_len = sizeof(*sin);
1514 sin->sin_addr = ip->ip_dst;
1515
1516 rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
1517 if (ipforward_rt.ro_rt == 0) {
1518 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1519 return;
1520 }
1521 rt = ipforward_rt.ro_rt;
1522 }
1523
1524 /*
1525 * Save at most 64 bytes of the packet in case
1526 * we need to generate an ICMP message to the src.
1527 */
1528 mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
1529 if (mcopy && (mcopy->m_flags & M_EXT))
1530 m_copydata(mcopy, 0, sizeof(struct ip), mtod(mcopy, caddr_t));
1531
1532#ifdef IPSTEALTH
1533 if (!ipstealth) {
1534#endif
1535 ip->ip_ttl -= IPTTLDEC;
1536#ifdef IPSTEALTH
1537 }
1538#endif
1539
1540 /*
1541 * If forwarding packet using same interface that it came in on,
1542 * perhaps should send a redirect to sender to shortcut a hop.
1543 * Only send redirect if source is sending directly to us,
1544 * and if packet was not source routed (or has any options).
1545 * Also, don't send redirect if forwarding using a default route
1546 * or a route modified by a redirect.
1547 */
1548#define satosin(sa) ((struct sockaddr_in *)(sa))
1549 if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1550 (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1551 satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1552 ipsendredirects && !srcrt) {
1553#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa))
1554 u_long src = ntohl(ip->ip_src.s_addr);
1555
1556 if (RTA(rt) &&
1557 (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1558 if (rt->rt_flags & RTF_GATEWAY)
1559 dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1560 else
1561 dest = ip->ip_dst.s_addr;
1562 /* Router requirements says to only send host redirects */
1563 type = ICMP_REDIRECT;
1564 code = ICMP_REDIRECT_HOST;
1565#ifdef DIAGNOSTIC
1566 if (ipprintfs)
1567 printf("redirect (%d) to %lx\n", code, (u_long)dest);
1568#endif
1569 }
1570 }
1571
1572 error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
1573 IP_FORWARDING, 0);
1574 if (error)
1575 ipstat.ips_cantforward++;
1576 else {
1577 ipstat.ips_forward++;
1578 if (type)
1579 ipstat.ips_redirectsent++;
1580 else {
1581 if (mcopy) {
1582 ipflow_create(&ipforward_rt, mcopy);
1583 m_freem(mcopy);
1584 }
1585 return;
1586 }
1587 }
1588 if (mcopy == NULL)
1589 return;
1590 destifp = NULL;
1591
1592 switch (error) {
1593
1594 case 0: /* forwarded, but need redirect */
1595 /* type, code set above */
1596 break;
1597
1598 case ENETUNREACH: /* shouldn't happen, checked above */
1599 case EHOSTUNREACH:
1600 case ENETDOWN:
1601 case EHOSTDOWN:
1602 default:
1603 type = ICMP_UNREACH;
1604 code = ICMP_UNREACH_HOST;
1605 break;
1606
1607 case EMSGSIZE:
1608 type = ICMP_UNREACH;
1609 code = ICMP_UNREACH_NEEDFRAG;
1610#ifndef IPSEC
1611 if (ipforward_rt.ro_rt)
1612 destifp = ipforward_rt.ro_rt->rt_ifp;
1613#else
1614 /*
1615 * If the packet is routed over IPsec tunnel, tell the
1616 * originator the tunnel MTU.
1617 * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1618 * XXX quickhack!!!
1619 */
1620 if (ipforward_rt.ro_rt) {
1621 struct secpolicy *sp = NULL;
1622 int ipsecerror;
1623 int ipsechdr;
1624 struct route *ro;
1625
1626 sp = ipsec4_getpolicybyaddr(mcopy,
1627 IPSEC_DIR_OUTBOUND,
1628 IP_FORWARDING,
1629 &ipsecerror);
1630
1631 if (sp == NULL)
1632 destifp = ipforward_rt.ro_rt->rt_ifp;
1633 else {
1634 /* count IPsec header size */
1635 ipsechdr = ipsec4_hdrsiz(mcopy,
1636 IPSEC_DIR_OUTBOUND,
1637 NULL);
1638
1639 /*
1640 * find the correct route for outer IPv4
1641 * header, compute tunnel MTU.
1642 *
1643 * XXX BUG ALERT
1644 * The "dummyifp" code relies upon the fact
1645 * that icmp_error() touches only ifp->if_mtu.
1646 */
1647 /*XXX*/
1648 destifp = NULL;
1649 if (sp->req != NULL
1650 && sp->req->sav != NULL
1651 && sp->req->sav->sah != NULL) {
1652 ro = &sp->req->sav->sah->sa_route;
1653 if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1654 dummyifp.if_mtu =
1655 ro->ro_rt->rt_ifp->if_mtu;
1656 dummyifp.if_mtu -= ipsechdr;
1657 destifp = &dummyifp;
1658 }
1659 }
1660
1661 key_freesp(sp);
1662 }
1663 }
1664#endif /*IPSEC*/
1665 ipstat.ips_cantfrag++;
1666 break;
1667
1668 case ENOBUFS:
1669 type = ICMP_SOURCEQUENCH;
1670 code = 0;
1671 break;
1672
1673 case EACCES: /* ipfw denied packet */
1674 m_freem(mcopy);
1675 return;
1676 }
1677 if (mcopy->m_flags & M_EXT)
1678 m_copyback(mcopy, 0, sizeof(struct ip), mtod(mcopy, caddr_t));
1679 icmp_error(mcopy, type, code, dest, destifp);
1680}
1681
1682void
1683ip_savecontrol(inp, mp, ip, m)
1684 register struct inpcb *inp;
1685 register struct mbuf **mp;
1686 register struct ip *ip;
1687 register struct mbuf *m;
1688{
1689 if (inp->inp_socket->so_options & SO_TIMESTAMP) {
1690 struct timeval tv;
1691
1692 microtime(&tv);
1693 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1694 SCM_TIMESTAMP, SOL_SOCKET);
1695 if (*mp)
1696 mp = &(*mp)->m_next;
1697 }
1698 if (inp->inp_flags & INP_RECVDSTADDR) {
1699 *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
1700 sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
1701 if (*mp)
1702 mp = &(*mp)->m_next;
1703 }
1704#ifdef notyet
1705 /* XXX
1706 * Moving these out of udp_input() made them even more broken
1707 * than they already were.
1708 */
1709 /* options were tossed already */
1710 if (inp->inp_flags & INP_RECVOPTS) {
1711 *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
1712 sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
1713 if (*mp)
1714 mp = &(*mp)->m_next;
1715 }
1716 /* ip_srcroute doesn't do what we want here, need to fix */
1717 if (inp->inp_flags & INP_RECVRETOPTS) {
1718 *mp = sbcreatecontrol((caddr_t) ip_srcroute(),
1719 sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
1720 if (*mp)
1721 mp = &(*mp)->m_next;
1722 }
1723#endif
1724 if (inp->inp_flags & INP_RECVIF) {
1725 struct ifnet *ifp;
1726 struct sdlbuf {
1727 struct sockaddr_dl sdl;
1728 u_char pad[32];
1729 } sdlbuf;
1730 struct sockaddr_dl *sdp;
1731 struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
1732
1733 if (((ifp = m->m_pkthdr.rcvif))
1734 && ( ifp->if_index && (ifp->if_index <= if_index))) {
1735 sdp = (struct sockaddr_dl *)(ifnet_addrs
1736 [ifp->if_index - 1]->ifa_addr);
1737 /*
1738 * Change our mind and don't try copy.
1739 */
1740 if ((sdp->sdl_family != AF_LINK)
1741 || (sdp->sdl_len > sizeof(sdlbuf))) {
1742 goto makedummy;
1743 }
1744 bcopy(sdp, sdl2, sdp->sdl_len);
1745 } else {
1746makedummy:
1747 sdl2->sdl_len
1748 = offsetof(struct sockaddr_dl, sdl_data[0]);
1749 sdl2->sdl_family = AF_LINK;
1750 sdl2->sdl_index = 0;
1751 sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1752 }
1753 *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
1754 IP_RECVIF, IPPROTO_IP);
1755 if (*mp)
1756 mp = &(*mp)->m_next;
1757 }
1758}
1759
1760int
1761ip_rsvp_init(struct socket *so)
1762{
1763 if (so->so_type != SOCK_RAW ||
1764 so->so_proto->pr_protocol != IPPROTO_RSVP)
1765 return EOPNOTSUPP;
1766
1767 if (ip_rsvpd != NULL)
1768 return EADDRINUSE;
1769
1770 ip_rsvpd = so;
1771 /*
1772 * This may seem silly, but we need to be sure we don't over-increment
1773 * the RSVP counter, in case something slips up.
1774 */
1775 if (!ip_rsvp_on) {
1776 ip_rsvp_on = 1;
1777 rsvp_on++;
1778 }
1779
1780 return 0;
1781}
1782
1783int
1784ip_rsvp_done(void)
1785{
1786 ip_rsvpd = NULL;
1787 /*
1788 * This may seem silly, but we need to be sure we don't over-decrement
1789 * the RSVP counter, in case something slips up.
1790 */
1791 if (ip_rsvp_on) {
1792 ip_rsvp_on = 0;
1793 rsvp_on--;
1794 }
1795 return 0;
1796}
693#ifdef IPDIVERT
694 /* Restore original checksum before diverting packet */
695 if (divert_info != 0) {
696 ip->ip_len += hlen;
697 HTONS(ip->ip_len);
698 HTONS(ip->ip_off);
699 ip->ip_sum = 0;
700 ip->ip_sum = in_cksum_hdr(ip);
701 NTOHS(ip->ip_off);
702 NTOHS(ip->ip_len);
703 ip->ip_len -= hlen;
704 }
705#endif
706 } else
707 if (fp)
708 ip_freef(fp);
709 } else
710 ip->ip_len -= hlen;
711
712#ifdef IPDIVERT
713 /*
714 * Divert or tee packet to the divert protocol if required.
715 *
716 * If divert_info is zero then cookie should be too, so we shouldn't
717 * need to clear them here. Assume divert_packet() does so also.
718 */
719 if (divert_info != 0) {
720 struct mbuf *clone = NULL;
721
722 /* Clone packet if we're doing a 'tee' */
723 if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
724 clone = m_dup(m, M_DONTWAIT);
725
726 /* Restore packet header fields to original values */
727 ip->ip_len += hlen;
728 HTONS(ip->ip_len);
729 HTONS(ip->ip_off);
730
731 /* Deliver packet to divert input routine */
732 ip_divert_cookie = divert_cookie;
733 divert_packet(m, 1, divert_info & 0xffff);
734 ipstat.ips_delivered++;
735
736 /* If 'tee', continue with original packet */
737 if (clone == NULL)
738 return;
739 m = clone;
740 ip = mtod(m, struct ip *);
741 }
742#endif
743
744 /*
745 * Switch out to protocol's input routine.
746 */
747 ipstat.ips_delivered++;
748 {
749 int off = hlen, nh = ip->ip_p;
750
751 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, off, nh);
752#ifdef IPFIREWALL_FORWARD
753 ip_fw_fwd_addr = NULL; /* tcp needed it */
754#endif
755 return;
756 }
757bad:
758#ifdef IPFIREWALL_FORWARD
759 ip_fw_fwd_addr = NULL;
760#endif
761 m_freem(m);
762}
763
764/*
765 * IP software interrupt routine - to go away sometime soon
766 */
767static void
768ipintr(void)
769{
770 int s;
771 struct mbuf *m;
772
773 while(1) {
774 s = splimp();
775 IF_DEQUEUE(&ipintrq, m);
776 splx(s);
777 if (m == 0)
778 return;
779 ip_input(m);
780 }
781}
782
783/*
784 * Take incoming datagram fragment and try to reassemble it into
785 * whole datagram. If a chain for reassembly of this datagram already
786 * exists, then it is given as fp; otherwise have to make a chain.
787 *
788 * When IPDIVERT enabled, keep additional state with each packet that
789 * tells us if we need to divert or tee the packet we're building.
790 */
791
792static struct mbuf *
793#ifdef IPDIVERT
794ip_reass(m, fp, where, divinfo, divcookie)
795#else
796ip_reass(m, fp, where)
797#endif
798 register struct mbuf *m;
799 register struct ipq *fp;
800 struct ipq *where;
801#ifdef IPDIVERT
802 u_int32_t *divinfo;
803 u_int16_t *divcookie;
804#endif
805{
806 struct ip *ip = mtod(m, struct ip *);
807 register struct mbuf *p = 0, *q, *nq;
808 struct mbuf *t;
809 int hlen = IP_VHL_HL(ip->ip_vhl) << 2;
810 int i, next;
811
812 /*
813 * Presence of header sizes in mbufs
814 * would confuse code below.
815 */
816 m->m_data += hlen;
817 m->m_len -= hlen;
818
819 /*
820 * If first fragment to arrive, create a reassembly queue.
821 */
822 if (fp == 0) {
823 if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
824 goto dropfrag;
825 fp = mtod(t, struct ipq *);
826 insque(fp, where);
827 nipq++;
828 fp->ipq_ttl = IPFRAGTTL;
829 fp->ipq_p = ip->ip_p;
830 fp->ipq_id = ip->ip_id;
831 fp->ipq_src = ip->ip_src;
832 fp->ipq_dst = ip->ip_dst;
833 fp->ipq_frags = m;
834 m->m_nextpkt = NULL;
835#ifdef IPDIVERT
836 fp->ipq_div_info = 0;
837 fp->ipq_div_cookie = 0;
838#endif
839 goto inserted;
840 }
841
842#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header))
843
844 /*
845 * Find a segment which begins after this one does.
846 */
847 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
848 if (GETIP(q)->ip_off > ip->ip_off)
849 break;
850
851 /*
852 * If there is a preceding segment, it may provide some of
853 * our data already. If so, drop the data from the incoming
854 * segment. If it provides all of our data, drop us, otherwise
855 * stick new segment in the proper place.
856 *
857 * If some of the data is dropped from the the preceding
858 * segment, then it's checksum is invalidated.
859 */
860 if (p) {
861 i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
862 if (i > 0) {
863 if (i >= ip->ip_len)
864 goto dropfrag;
865 m_adj(m, i);
866 m->m_pkthdr.csum_flags = 0;
867 ip->ip_off += i;
868 ip->ip_len -= i;
869 }
870 m->m_nextpkt = p->m_nextpkt;
871 p->m_nextpkt = m;
872 } else {
873 m->m_nextpkt = fp->ipq_frags;
874 fp->ipq_frags = m;
875 }
876
877 /*
878 * While we overlap succeeding segments trim them or,
879 * if they are completely covered, dequeue them.
880 */
881 for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
882 q = nq) {
883 i = (ip->ip_off + ip->ip_len) -
884 GETIP(q)->ip_off;
885 if (i < GETIP(q)->ip_len) {
886 GETIP(q)->ip_len -= i;
887 GETIP(q)->ip_off += i;
888 m_adj(q, i);
889 q->m_pkthdr.csum_flags = 0;
890 break;
891 }
892 nq = q->m_nextpkt;
893 m->m_nextpkt = nq;
894 m_freem(q);
895 }
896
897inserted:
898
899#ifdef IPDIVERT
900 /*
901 * Transfer firewall instructions to the fragment structure.
902 * Any fragment diverting causes the whole packet to divert.
903 */
904 fp->ipq_div_info = *divinfo;
905 fp->ipq_div_cookie = *divcookie;
906 *divinfo = 0;
907 *divcookie = 0;
908#endif
909
910 /*
911 * Check for complete reassembly.
912 */
913 next = 0;
914 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
915 if (GETIP(q)->ip_off != next)
916 return (0);
917 next += GETIP(q)->ip_len;
918 }
919 /* Make sure the last packet didn't have the IP_MF flag */
920 if (p->m_flags & M_FRAG)
921 return (0);
922
923 /*
924 * Reassembly is complete. Make sure the packet is a sane size.
925 */
926 q = fp->ipq_frags;
927 ip = GETIP(q);
928 if (next + (IP_VHL_HL(ip->ip_vhl) << 2) > IP_MAXPACKET) {
929 ipstat.ips_toolong++;
930 ip_freef(fp);
931 return (0);
932 }
933
934 /*
935 * Concatenate fragments.
936 */
937 m = q;
938 t = m->m_next;
939 m->m_next = 0;
940 m_cat(m, t);
941 nq = q->m_nextpkt;
942 q->m_nextpkt = 0;
943 for (q = nq; q != NULL; q = nq) {
944 nq = q->m_nextpkt;
945 q->m_nextpkt = NULL;
946 m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
947 m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
948 m_cat(m, q);
949 }
950
951#ifdef IPDIVERT
952 /*
953 * Extract firewall instructions from the fragment structure.
954 */
955 *divinfo = fp->ipq_div_info;
956 *divcookie = fp->ipq_div_cookie;
957#endif
958
959 /*
960 * Create header for new ip packet by
961 * modifying header of first packet;
962 * dequeue and discard fragment reassembly header.
963 * Make header visible.
964 */
965 ip->ip_len = next;
966 ip->ip_src = fp->ipq_src;
967 ip->ip_dst = fp->ipq_dst;
968 remque(fp);
969 nipq--;
970 (void) m_free(dtom(fp));
971 m->m_len += (IP_VHL_HL(ip->ip_vhl) << 2);
972 m->m_data -= (IP_VHL_HL(ip->ip_vhl) << 2);
973 /* some debugging cruft by sklower, below, will go away soon */
974 if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
975 register int plen = 0;
976 for (t = m; t; t = t->m_next)
977 plen += t->m_len;
978 m->m_pkthdr.len = plen;
979 }
980 return (m);
981
982dropfrag:
983#ifdef IPDIVERT
984 *divinfo = 0;
985 *divcookie = 0;
986#endif
987 ipstat.ips_fragdropped++;
988 m_freem(m);
989 return (0);
990
991#undef GETIP
992}
993
994/*
995 * Free a fragment reassembly header and all
996 * associated datagrams.
997 */
998static void
999ip_freef(fp)
1000 struct ipq *fp;
1001{
1002 register struct mbuf *q;
1003
1004 while (fp->ipq_frags) {
1005 q = fp->ipq_frags;
1006 fp->ipq_frags = q->m_nextpkt;
1007 m_freem(q);
1008 }
1009 remque(fp);
1010 (void) m_free(dtom(fp));
1011 nipq--;
1012}
1013
1014/*
1015 * IP timer processing;
1016 * if a timer expires on a reassembly
1017 * queue, discard it.
1018 */
1019void
1020ip_slowtimo()
1021{
1022 register struct ipq *fp;
1023 int s = splnet();
1024 int i;
1025
1026 for (i = 0; i < IPREASS_NHASH; i++) {
1027 fp = ipq[i].next;
1028 if (fp == 0)
1029 continue;
1030 while (fp != &ipq[i]) {
1031 --fp->ipq_ttl;
1032 fp = fp->next;
1033 if (fp->prev->ipq_ttl == 0) {
1034 ipstat.ips_fragtimeout++;
1035 ip_freef(fp->prev);
1036 }
1037 }
1038 }
1039 ipflow_slowtimo();
1040 splx(s);
1041}
1042
1043/*
1044 * Drain off all datagram fragments.
1045 */
1046void
1047ip_drain()
1048{
1049 int i;
1050
1051 for (i = 0; i < IPREASS_NHASH; i++) {
1052 while (ipq[i].next != &ipq[i]) {
1053 ipstat.ips_fragdropped++;
1054 ip_freef(ipq[i].next);
1055 }
1056 }
1057 in_rtqdrain();
1058}
1059
1060/*
1061 * Do option processing on a datagram,
1062 * possibly discarding it if bad options are encountered,
1063 * or forwarding it if source-routed.
1064 * Returns 1 if packet has been forwarded/freed,
1065 * 0 if the packet should be processed further.
1066 */
1067static int
1068ip_dooptions(m)
1069 struct mbuf *m;
1070{
1071 register struct ip *ip = mtod(m, struct ip *);
1072 register u_char *cp;
1073 register struct ip_timestamp *ipt;
1074 register struct in_ifaddr *ia;
1075 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1076 struct in_addr *sin, dst;
1077 n_time ntime;
1078
1079 dst = ip->ip_dst;
1080 cp = (u_char *)(ip + 1);
1081 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1082 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1083 opt = cp[IPOPT_OPTVAL];
1084 if (opt == IPOPT_EOL)
1085 break;
1086 if (opt == IPOPT_NOP)
1087 optlen = 1;
1088 else {
1089 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1090 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1091 goto bad;
1092 }
1093 optlen = cp[IPOPT_OLEN];
1094 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1095 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1096 goto bad;
1097 }
1098 }
1099 switch (opt) {
1100
1101 default:
1102 break;
1103
1104 /*
1105 * Source routing with record.
1106 * Find interface with current destination address.
1107 * If none on this machine then drop if strictly routed,
1108 * or do nothing if loosely routed.
1109 * Record interface address and bring up next address
1110 * component. If strictly routed make sure next
1111 * address is on directly accessible net.
1112 */
1113 case IPOPT_LSRR:
1114 case IPOPT_SSRR:
1115 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1116 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1117 goto bad;
1118 }
1119 ipaddr.sin_addr = ip->ip_dst;
1120 ia = (struct in_ifaddr *)
1121 ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1122 if (ia == 0) {
1123 if (opt == IPOPT_SSRR) {
1124 type = ICMP_UNREACH;
1125 code = ICMP_UNREACH_SRCFAIL;
1126 goto bad;
1127 }
1128 if (!ip_dosourceroute)
1129 goto nosourcerouting;
1130 /*
1131 * Loose routing, and not at next destination
1132 * yet; nothing to do except forward.
1133 */
1134 break;
1135 }
1136 off--; /* 0 origin */
1137 if (off > optlen - (int)sizeof(struct in_addr)) {
1138 /*
1139 * End of source route. Should be for us.
1140 */
1141 if (!ip_acceptsourceroute)
1142 goto nosourcerouting;
1143 save_rte(cp, ip->ip_src);
1144 break;
1145 }
1146
1147 if (!ip_dosourceroute) {
1148 if (ipforwarding) {
1149 char buf[16]; /* aaa.bbb.ccc.ddd\0 */
1150 /*
1151 * Acting as a router, so generate ICMP
1152 */
1153nosourcerouting:
1154 strcpy(buf, inet_ntoa(ip->ip_dst));
1155 log(LOG_WARNING,
1156 "attempted source route from %s to %s\n",
1157 inet_ntoa(ip->ip_src), buf);
1158 type = ICMP_UNREACH;
1159 code = ICMP_UNREACH_SRCFAIL;
1160 goto bad;
1161 } else {
1162 /*
1163 * Not acting as a router, so silently drop.
1164 */
1165 ipstat.ips_cantforward++;
1166 m_freem(m);
1167 return (1);
1168 }
1169 }
1170
1171 /*
1172 * locate outgoing interface
1173 */
1174 (void)memcpy(&ipaddr.sin_addr, cp + off,
1175 sizeof(ipaddr.sin_addr));
1176
1177 if (opt == IPOPT_SSRR) {
1178#define INA struct in_ifaddr *
1179#define SA struct sockaddr *
1180 if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1181 ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1182 } else
1183 ia = ip_rtaddr(ipaddr.sin_addr);
1184 if (ia == 0) {
1185 type = ICMP_UNREACH;
1186 code = ICMP_UNREACH_SRCFAIL;
1187 goto bad;
1188 }
1189 ip->ip_dst = ipaddr.sin_addr;
1190 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
1191 sizeof(struct in_addr));
1192 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1193 /*
1194 * Let ip_intr's mcast routing check handle mcast pkts
1195 */
1196 forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1197 break;
1198
1199 case IPOPT_RR:
1200 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1201 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1202 goto bad;
1203 }
1204 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1205 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1206 goto bad;
1207 }
1208 /*
1209 * If no space remains, ignore.
1210 */
1211 off--; /* 0 origin */
1212 if (off > optlen - (int)sizeof(struct in_addr))
1213 break;
1214 (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1215 sizeof(ipaddr.sin_addr));
1216 /*
1217 * locate outgoing interface; if we're the destination,
1218 * use the incoming interface (should be same).
1219 */
1220 if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
1221 (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
1222 type = ICMP_UNREACH;
1223 code = ICMP_UNREACH_HOST;
1224 goto bad;
1225 }
1226 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
1227 sizeof(struct in_addr));
1228 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1229 break;
1230
1231 case IPOPT_TS:
1232 code = cp - (u_char *)ip;
1233 ipt = (struct ip_timestamp *)cp;
1234 if (ipt->ipt_len < 5)
1235 goto bad;
1236 if (ipt->ipt_ptr >
1237 ipt->ipt_len - (int)sizeof(int32_t)) {
1238 if (++ipt->ipt_oflw == 0)
1239 goto bad;
1240 break;
1241 }
1242 sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
1243 switch (ipt->ipt_flg) {
1244
1245 case IPOPT_TS_TSONLY:
1246 break;
1247
1248 case IPOPT_TS_TSANDADDR:
1249 if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1250 sizeof(struct in_addr) > ipt->ipt_len)
1251 goto bad;
1252 ipaddr.sin_addr = dst;
1253 ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1254 m->m_pkthdr.rcvif);
1255 if (ia == 0)
1256 continue;
1257 (void)memcpy(sin, &IA_SIN(ia)->sin_addr,
1258 sizeof(struct in_addr));
1259 ipt->ipt_ptr += sizeof(struct in_addr);
1260 break;
1261
1262 case IPOPT_TS_PRESPEC:
1263 if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1264 sizeof(struct in_addr) > ipt->ipt_len)
1265 goto bad;
1266 (void)memcpy(&ipaddr.sin_addr, sin,
1267 sizeof(struct in_addr));
1268 if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1269 continue;
1270 ipt->ipt_ptr += sizeof(struct in_addr);
1271 break;
1272
1273 default:
1274 goto bad;
1275 }
1276 ntime = iptime();
1277 (void)memcpy(cp + ipt->ipt_ptr - 1, &ntime,
1278 sizeof(n_time));
1279 ipt->ipt_ptr += sizeof(n_time);
1280 }
1281 }
1282 if (forward && ipforwarding) {
1283 ip_forward(m, 1);
1284 return (1);
1285 }
1286 return (0);
1287bad:
1288 icmp_error(m, type, code, 0, 0);
1289 ipstat.ips_badoptions++;
1290 return (1);
1291}
1292
1293/*
1294 * Given address of next destination (final or next hop),
1295 * return internet address info of interface to be used to get there.
1296 */
1297static struct in_ifaddr *
1298ip_rtaddr(dst)
1299 struct in_addr dst;
1300{
1301 register struct sockaddr_in *sin;
1302
1303 sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
1304
1305 if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
1306 if (ipforward_rt.ro_rt) {
1307 RTFREE(ipforward_rt.ro_rt);
1308 ipforward_rt.ro_rt = 0;
1309 }
1310 sin->sin_family = AF_INET;
1311 sin->sin_len = sizeof(*sin);
1312 sin->sin_addr = dst;
1313
1314 rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
1315 }
1316 if (ipforward_rt.ro_rt == 0)
1317 return ((struct in_ifaddr *)0);
1318 return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
1319}
1320
1321/*
1322 * Save incoming source route for use in replies,
1323 * to be picked up later by ip_srcroute if the receiver is interested.
1324 */
1325void
1326save_rte(option, dst)
1327 u_char *option;
1328 struct in_addr dst;
1329{
1330 unsigned olen;
1331
1332 olen = option[IPOPT_OLEN];
1333#ifdef DIAGNOSTIC
1334 if (ipprintfs)
1335 printf("save_rte: olen %d\n", olen);
1336#endif
1337 if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1338 return;
1339 bcopy(option, ip_srcrt.srcopt, olen);
1340 ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1341 ip_srcrt.dst = dst;
1342}
1343
1344/*
1345 * Retrieve incoming source route for use in replies,
1346 * in the same form used by setsockopt.
1347 * The first hop is placed before the options, will be removed later.
1348 */
1349struct mbuf *
1350ip_srcroute()
1351{
1352 register struct in_addr *p, *q;
1353 register struct mbuf *m;
1354
1355 if (ip_nhops == 0)
1356 return ((struct mbuf *)0);
1357 m = m_get(M_DONTWAIT, MT_HEADER);
1358 if (m == 0)
1359 return ((struct mbuf *)0);
1360
1361#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1362
1363 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1364 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1365 OPTSIZ;
1366#ifdef DIAGNOSTIC
1367 if (ipprintfs)
1368 printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1369#endif
1370
1371 /*
1372 * First save first hop for return route
1373 */
1374 p = &ip_srcrt.route[ip_nhops - 1];
1375 *(mtod(m, struct in_addr *)) = *p--;
1376#ifdef DIAGNOSTIC
1377 if (ipprintfs)
1378 printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1379#endif
1380
1381 /*
1382 * Copy option fields and padding (nop) to mbuf.
1383 */
1384 ip_srcrt.nop = IPOPT_NOP;
1385 ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1386 (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
1387 &ip_srcrt.nop, OPTSIZ);
1388 q = (struct in_addr *)(mtod(m, caddr_t) +
1389 sizeof(struct in_addr) + OPTSIZ);
1390#undef OPTSIZ
1391 /*
1392 * Record return path as an IP source route,
1393 * reversing the path (pointers are now aligned).
1394 */
1395 while (p >= ip_srcrt.route) {
1396#ifdef DIAGNOSTIC
1397 if (ipprintfs)
1398 printf(" %lx", (u_long)ntohl(q->s_addr));
1399#endif
1400 *q++ = *p--;
1401 }
1402 /*
1403 * Last hop goes to final destination.
1404 */
1405 *q = ip_srcrt.dst;
1406#ifdef DIAGNOSTIC
1407 if (ipprintfs)
1408 printf(" %lx\n", (u_long)ntohl(q->s_addr));
1409#endif
1410 return (m);
1411}
1412
1413/*
1414 * Strip out IP options, at higher
1415 * level protocol in the kernel.
1416 * Second argument is buffer to which options
1417 * will be moved, and return value is their length.
1418 * XXX should be deleted; last arg currently ignored.
1419 */
1420void
1421ip_stripoptions(m, mopt)
1422 register struct mbuf *m;
1423 struct mbuf *mopt;
1424{
1425 register int i;
1426 struct ip *ip = mtod(m, struct ip *);
1427 register caddr_t opts;
1428 int olen;
1429
1430 olen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1431 opts = (caddr_t)(ip + 1);
1432 i = m->m_len - (sizeof (struct ip) + olen);
1433 bcopy(opts + olen, opts, (unsigned)i);
1434 m->m_len -= olen;
1435 if (m->m_flags & M_PKTHDR)
1436 m->m_pkthdr.len -= olen;
1437 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2);
1438}
1439
1440u_char inetctlerrmap[PRC_NCMDS] = {
1441 0, 0, 0, 0,
1442 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1443 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1444 EMSGSIZE, EHOSTUNREACH, 0, 0,
1445 0, 0, 0, 0,
1446 ENOPROTOOPT
1447};
1448
1449/*
1450 * Forward a packet. If some error occurs return the sender
1451 * an icmp packet. Note we can't always generate a meaningful
1452 * icmp message because icmp doesn't have a large enough repertoire
1453 * of codes and types.
1454 *
1455 * If not forwarding, just drop the packet. This could be confusing
1456 * if ipforwarding was zero but some routing protocol was advancing
1457 * us as a gateway to somewhere. However, we must let the routing
1458 * protocol deal with that.
1459 *
1460 * The srcrt parameter indicates whether the packet is being forwarded
1461 * via a source route.
1462 */
1463static void
1464ip_forward(m, srcrt)
1465 struct mbuf *m;
1466 int srcrt;
1467{
1468 register struct ip *ip = mtod(m, struct ip *);
1469 register struct sockaddr_in *sin;
1470 register struct rtentry *rt;
1471 int error, type = 0, code = 0;
1472 struct mbuf *mcopy;
1473 n_long dest;
1474 struct ifnet *destifp;
1475#ifdef IPSEC
1476 struct ifnet dummyifp;
1477#endif
1478
1479 dest = 0;
1480#ifdef DIAGNOSTIC
1481 if (ipprintfs)
1482 printf("forward: src %lx dst %lx ttl %x\n",
1483 (u_long)ip->ip_src.s_addr, (u_long)ip->ip_dst.s_addr,
1484 ip->ip_ttl);
1485#endif
1486
1487
1488 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
1489 ipstat.ips_cantforward++;
1490 m_freem(m);
1491 return;
1492 }
1493#ifdef IPSTEALTH
1494 if (!ipstealth) {
1495#endif
1496 if (ip->ip_ttl <= IPTTLDEC) {
1497 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
1498 dest, 0);
1499 return;
1500 }
1501#ifdef IPSTEALTH
1502 }
1503#endif
1504
1505 sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
1506 if ((rt = ipforward_rt.ro_rt) == 0 ||
1507 ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
1508 if (ipforward_rt.ro_rt) {
1509 RTFREE(ipforward_rt.ro_rt);
1510 ipforward_rt.ro_rt = 0;
1511 }
1512 sin->sin_family = AF_INET;
1513 sin->sin_len = sizeof(*sin);
1514 sin->sin_addr = ip->ip_dst;
1515
1516 rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
1517 if (ipforward_rt.ro_rt == 0) {
1518 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1519 return;
1520 }
1521 rt = ipforward_rt.ro_rt;
1522 }
1523
1524 /*
1525 * Save at most 64 bytes of the packet in case
1526 * we need to generate an ICMP message to the src.
1527 */
1528 mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
1529 if (mcopy && (mcopy->m_flags & M_EXT))
1530 m_copydata(mcopy, 0, sizeof(struct ip), mtod(mcopy, caddr_t));
1531
1532#ifdef IPSTEALTH
1533 if (!ipstealth) {
1534#endif
1535 ip->ip_ttl -= IPTTLDEC;
1536#ifdef IPSTEALTH
1537 }
1538#endif
1539
1540 /*
1541 * If forwarding packet using same interface that it came in on,
1542 * perhaps should send a redirect to sender to shortcut a hop.
1543 * Only send redirect if source is sending directly to us,
1544 * and if packet was not source routed (or has any options).
1545 * Also, don't send redirect if forwarding using a default route
1546 * or a route modified by a redirect.
1547 */
1548#define satosin(sa) ((struct sockaddr_in *)(sa))
1549 if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1550 (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1551 satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1552 ipsendredirects && !srcrt) {
1553#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa))
1554 u_long src = ntohl(ip->ip_src.s_addr);
1555
1556 if (RTA(rt) &&
1557 (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1558 if (rt->rt_flags & RTF_GATEWAY)
1559 dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1560 else
1561 dest = ip->ip_dst.s_addr;
1562 /* Router requirements says to only send host redirects */
1563 type = ICMP_REDIRECT;
1564 code = ICMP_REDIRECT_HOST;
1565#ifdef DIAGNOSTIC
1566 if (ipprintfs)
1567 printf("redirect (%d) to %lx\n", code, (u_long)dest);
1568#endif
1569 }
1570 }
1571
1572 error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
1573 IP_FORWARDING, 0);
1574 if (error)
1575 ipstat.ips_cantforward++;
1576 else {
1577 ipstat.ips_forward++;
1578 if (type)
1579 ipstat.ips_redirectsent++;
1580 else {
1581 if (mcopy) {
1582 ipflow_create(&ipforward_rt, mcopy);
1583 m_freem(mcopy);
1584 }
1585 return;
1586 }
1587 }
1588 if (mcopy == NULL)
1589 return;
1590 destifp = NULL;
1591
1592 switch (error) {
1593
1594 case 0: /* forwarded, but need redirect */
1595 /* type, code set above */
1596 break;
1597
1598 case ENETUNREACH: /* shouldn't happen, checked above */
1599 case EHOSTUNREACH:
1600 case ENETDOWN:
1601 case EHOSTDOWN:
1602 default:
1603 type = ICMP_UNREACH;
1604 code = ICMP_UNREACH_HOST;
1605 break;
1606
1607 case EMSGSIZE:
1608 type = ICMP_UNREACH;
1609 code = ICMP_UNREACH_NEEDFRAG;
1610#ifndef IPSEC
1611 if (ipforward_rt.ro_rt)
1612 destifp = ipforward_rt.ro_rt->rt_ifp;
1613#else
1614 /*
1615 * If the packet is routed over IPsec tunnel, tell the
1616 * originator the tunnel MTU.
1617 * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1618 * XXX quickhack!!!
1619 */
1620 if (ipforward_rt.ro_rt) {
1621 struct secpolicy *sp = NULL;
1622 int ipsecerror;
1623 int ipsechdr;
1624 struct route *ro;
1625
1626 sp = ipsec4_getpolicybyaddr(mcopy,
1627 IPSEC_DIR_OUTBOUND,
1628 IP_FORWARDING,
1629 &ipsecerror);
1630
1631 if (sp == NULL)
1632 destifp = ipforward_rt.ro_rt->rt_ifp;
1633 else {
1634 /* count IPsec header size */
1635 ipsechdr = ipsec4_hdrsiz(mcopy,
1636 IPSEC_DIR_OUTBOUND,
1637 NULL);
1638
1639 /*
1640 * find the correct route for outer IPv4
1641 * header, compute tunnel MTU.
1642 *
1643 * XXX BUG ALERT
1644 * The "dummyifp" code relies upon the fact
1645 * that icmp_error() touches only ifp->if_mtu.
1646 */
1647 /*XXX*/
1648 destifp = NULL;
1649 if (sp->req != NULL
1650 && sp->req->sav != NULL
1651 && sp->req->sav->sah != NULL) {
1652 ro = &sp->req->sav->sah->sa_route;
1653 if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1654 dummyifp.if_mtu =
1655 ro->ro_rt->rt_ifp->if_mtu;
1656 dummyifp.if_mtu -= ipsechdr;
1657 destifp = &dummyifp;
1658 }
1659 }
1660
1661 key_freesp(sp);
1662 }
1663 }
1664#endif /*IPSEC*/
1665 ipstat.ips_cantfrag++;
1666 break;
1667
1668 case ENOBUFS:
1669 type = ICMP_SOURCEQUENCH;
1670 code = 0;
1671 break;
1672
1673 case EACCES: /* ipfw denied packet */
1674 m_freem(mcopy);
1675 return;
1676 }
1677 if (mcopy->m_flags & M_EXT)
1678 m_copyback(mcopy, 0, sizeof(struct ip), mtod(mcopy, caddr_t));
1679 icmp_error(mcopy, type, code, dest, destifp);
1680}
1681
1682void
1683ip_savecontrol(inp, mp, ip, m)
1684 register struct inpcb *inp;
1685 register struct mbuf **mp;
1686 register struct ip *ip;
1687 register struct mbuf *m;
1688{
1689 if (inp->inp_socket->so_options & SO_TIMESTAMP) {
1690 struct timeval tv;
1691
1692 microtime(&tv);
1693 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1694 SCM_TIMESTAMP, SOL_SOCKET);
1695 if (*mp)
1696 mp = &(*mp)->m_next;
1697 }
1698 if (inp->inp_flags & INP_RECVDSTADDR) {
1699 *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
1700 sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
1701 if (*mp)
1702 mp = &(*mp)->m_next;
1703 }
1704#ifdef notyet
1705 /* XXX
1706 * Moving these out of udp_input() made them even more broken
1707 * than they already were.
1708 */
1709 /* options were tossed already */
1710 if (inp->inp_flags & INP_RECVOPTS) {
1711 *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
1712 sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
1713 if (*mp)
1714 mp = &(*mp)->m_next;
1715 }
1716 /* ip_srcroute doesn't do what we want here, need to fix */
1717 if (inp->inp_flags & INP_RECVRETOPTS) {
1718 *mp = sbcreatecontrol((caddr_t) ip_srcroute(),
1719 sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
1720 if (*mp)
1721 mp = &(*mp)->m_next;
1722 }
1723#endif
1724 if (inp->inp_flags & INP_RECVIF) {
1725 struct ifnet *ifp;
1726 struct sdlbuf {
1727 struct sockaddr_dl sdl;
1728 u_char pad[32];
1729 } sdlbuf;
1730 struct sockaddr_dl *sdp;
1731 struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
1732
1733 if (((ifp = m->m_pkthdr.rcvif))
1734 && ( ifp->if_index && (ifp->if_index <= if_index))) {
1735 sdp = (struct sockaddr_dl *)(ifnet_addrs
1736 [ifp->if_index - 1]->ifa_addr);
1737 /*
1738 * Change our mind and don't try copy.
1739 */
1740 if ((sdp->sdl_family != AF_LINK)
1741 || (sdp->sdl_len > sizeof(sdlbuf))) {
1742 goto makedummy;
1743 }
1744 bcopy(sdp, sdl2, sdp->sdl_len);
1745 } else {
1746makedummy:
1747 sdl2->sdl_len
1748 = offsetof(struct sockaddr_dl, sdl_data[0]);
1749 sdl2->sdl_family = AF_LINK;
1750 sdl2->sdl_index = 0;
1751 sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1752 }
1753 *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
1754 IP_RECVIF, IPPROTO_IP);
1755 if (*mp)
1756 mp = &(*mp)->m_next;
1757 }
1758}
1759
1760int
1761ip_rsvp_init(struct socket *so)
1762{
1763 if (so->so_type != SOCK_RAW ||
1764 so->so_proto->pr_protocol != IPPROTO_RSVP)
1765 return EOPNOTSUPP;
1766
1767 if (ip_rsvpd != NULL)
1768 return EADDRINUSE;
1769
1770 ip_rsvpd = so;
1771 /*
1772 * This may seem silly, but we need to be sure we don't over-increment
1773 * the RSVP counter, in case something slips up.
1774 */
1775 if (!ip_rsvp_on) {
1776 ip_rsvp_on = 1;
1777 rsvp_on++;
1778 }
1779
1780 return 0;
1781}
1782
1783int
1784ip_rsvp_done(void)
1785{
1786 ip_rsvpd = NULL;
1787 /*
1788 * This may seem silly, but we need to be sure we don't over-decrement
1789 * the RSVP counter, in case something slips up.
1790 */
1791 if (ip_rsvp_on) {
1792 ip_rsvp_on = 0;
1793 rsvp_on--;
1794 }
1795 return 0;
1796}