Deleted Added
full compact
ip_input.c (105218) ip_input.c (105586)
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
34 * $FreeBSD: head/sys/netinet/ip_input.c 105218 2002-10-16 09:01:48Z guido $
34 * $FreeBSD: head/sys/netinet/ip_input.c 105586 2002-10-20 22:52:07Z phk $
35 */
36
35 */
36
37#define _IP_VHL
38
39#include "opt_bootp.h"
40#include "opt_ipfw.h"
41#include "opt_ipdn.h"
42#include "opt_ipdivert.h"
43#include "opt_ipfilter.h"
44#include "opt_ipstealth.h"
45#include "opt_ipsec.h"
46#include "opt_mac.h"
47#include "opt_pfil_hooks.h"
48#include "opt_random_ip_id.h"
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/mac.h>
53#include <sys/mbuf.h>
54#include <sys/malloc.h>
55#include <sys/domain.h>
56#include <sys/protosw.h>
57#include <sys/socket.h>
58#include <sys/time.h>
59#include <sys/kernel.h>
60#include <sys/syslog.h>
61#include <sys/sysctl.h>
62
63#include <net/pfil.h>
64#include <net/if.h>
65#include <net/if_types.h>
66#include <net/if_var.h>
67#include <net/if_dl.h>
68#include <net/route.h>
69#include <net/netisr.h>
70#include <net/intrq.h>
71
72#include <netinet/in.h>
73#include <netinet/in_systm.h>
74#include <netinet/in_var.h>
75#include <netinet/ip.h>
76#include <netinet/in_pcb.h>
77#include <netinet/ip_var.h>
78#include <netinet/ip_icmp.h>
79#include <machine/in_cksum.h>
80
81#include <sys/socketvar.h>
82
83#include <netinet/ip_fw.h>
84#include <netinet/ip_dummynet.h>
85
86#ifdef IPSEC
87#include <netinet6/ipsec.h>
88#include <netkey/key.h>
89#endif
90
91#ifdef FAST_IPSEC
92#include <netipsec/ipsec.h>
93#include <netipsec/key.h>
94#endif
95
96int rsvp_on = 0;
97
98int ipforwarding = 0;
99SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
100 &ipforwarding, 0, "Enable IP forwarding between interfaces");
101
102static int ipsendredirects = 1; /* XXX */
103SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
104 &ipsendredirects, 0, "Enable sending IP redirects");
105
106int ip_defttl = IPDEFTTL;
107SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
108 &ip_defttl, 0, "Maximum TTL on IP packets");
109
110static int ip_dosourceroute = 0;
111SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
112 &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
113
114static int ip_acceptsourceroute = 0;
115SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
116 CTLFLAG_RW, &ip_acceptsourceroute, 0,
117 "Enable accepting source routed IP packets");
118
119static int ip_keepfaith = 0;
120SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
121 &ip_keepfaith, 0,
122 "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
123
124static int ip_nfragpackets = 0;
125static int ip_maxfragpackets; /* initialized in ip_init() */
126SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
127 &ip_maxfragpackets, 0,
128 "Maximum number of IPv4 fragment reassembly queue entries");
129
130/*
131 * XXX - Setting ip_checkinterface mostly implements the receive side of
132 * the Strong ES model described in RFC 1122, but since the routing table
133 * and transmit implementation do not implement the Strong ES model,
134 * setting this to 1 results in an odd hybrid.
135 *
136 * XXX - ip_checkinterface currently must be disabled if you use ipnat
137 * to translate the destination address to another local interface.
138 *
139 * XXX - ip_checkinterface must be disabled if you add IP aliases
140 * to the loopback interface instead of the interface where the
141 * packets for those addresses are received.
142 */
143static int ip_checkinterface = 1;
144SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
145 &ip_checkinterface, 0, "Verify packet arrives on correct interface");
146
147#ifdef DIAGNOSTIC
148static int ipprintfs = 0;
149#endif
150
151static int ipqmaxlen = IFQ_MAXLEN;
152
153extern struct domain inetdomain;
154extern struct protosw inetsw[];
155u_char ip_protox[IPPROTO_MAX];
156struct in_ifaddrhead in_ifaddrhead; /* first inet address */
157struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */
158u_long in_ifaddrhmask; /* mask for hash table */
159
160SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
161 &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
162SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
163 &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
164
165struct ipstat ipstat;
166SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
167 &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
168
169/* Packet reassembly stuff */
170#define IPREASS_NHASH_LOG2 6
171#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
172#define IPREASS_HMASK (IPREASS_NHASH - 1)
173#define IPREASS_HASH(x,y) \
174 (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
175
176static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
177static int nipq = 0; /* total # of reass queues */
178static int maxnipq;
179
180#ifdef IPCTL_DEFMTU
181SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
182 &ip_mtu, 0, "Default MTU");
183#endif
184
185#ifdef IPSTEALTH
186static int ipstealth = 0;
187SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
188 &ipstealth, 0, "");
189#endif
190
191
192/* Firewall hooks */
193ip_fw_chk_t *ip_fw_chk_ptr;
194int fw_enable = 1 ;
195
196/* Dummynet hooks */
197ip_dn_io_t *ip_dn_io_ptr;
198
199
200/*
201 * XXX this is ugly -- the following two global variables are
202 * used to store packet state while it travels through the stack.
203 * Note that the code even makes assumptions on the size and
204 * alignment of fields inside struct ip_srcrt so e.g. adding some
205 * fields will break the code. This needs to be fixed.
206 *
207 * We need to save the IP options in case a protocol wants to respond
208 * to an incoming packet over the same route if the packet got here
209 * using IP source routing. This allows connection establishment and
210 * maintenance when the remote end is on a network that is not known
211 * to us.
212 */
213static int ip_nhops = 0;
214static struct ip_srcrt {
215 struct in_addr dst; /* final destination */
216 char nop; /* one NOP to align */
217 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
218 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
219} ip_srcrt;
220
221static void save_rte(u_char *, struct in_addr);
222static int ip_dooptions(struct mbuf *m, int,
223 struct sockaddr_in *next_hop);
224static void ip_forward(struct mbuf *m, int srcrt,
225 struct sockaddr_in *next_hop);
226static void ip_freef(struct ipqhead *, struct ipq *);
227static struct mbuf *ip_reass(struct mbuf *, struct ipqhead *,
228 struct ipq *, u_int32_t *, u_int16_t *);
229static void ipintr(void);
230
231/*
232 * IP initialization: fill in IP protocol switch table.
233 * All protocols not implemented in kernel go to raw IP protocol handler.
234 */
235void
236ip_init()
237{
238 register struct protosw *pr;
239 register int i;
240
241 TAILQ_INIT(&in_ifaddrhead);
242 in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
243 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
244 if (pr == 0)
245 panic("ip_init");
246 for (i = 0; i < IPPROTO_MAX; i++)
247 ip_protox[i] = pr - inetsw;
248 for (pr = inetdomain.dom_protosw;
249 pr < inetdomain.dom_protoswNPROTOSW; pr++)
250 if (pr->pr_domain->dom_family == PF_INET &&
251 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
252 ip_protox[pr->pr_protocol] = pr - inetsw;
253
254 for (i = 0; i < IPREASS_NHASH; i++)
255 TAILQ_INIT(&ipq[i]);
256
257 maxnipq = nmbclusters / 4;
258 ip_maxfragpackets = nmbclusters / 4;
259
260#ifndef RANDOM_IP_ID
261 ip_id = time_second & 0xffff;
262#endif
263 ipintrq.ifq_maxlen = ipqmaxlen;
264 mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
265 ipintrq_present = 1;
266
267 register_netisr(NETISR_IP, ipintr);
268}
269
270/*
271 * XXX watch out this one. It is perhaps used as a cache for
272 * the most recently used route ? it is cleared in in_addroute()
273 * when a new route is successfully created.
274 */
275struct route ipforward_rt;
276
277/*
278 * Ip input routine. Checksum and byte swap header. If fragmented
279 * try to reassemble. Process options. Pass to next level.
280 */
281void
282ip_input(struct mbuf *m)
283{
284 struct ip *ip;
285 struct ipq *fp;
286 struct in_ifaddr *ia = NULL;
287 struct ifaddr *ifa;
288 int i, hlen, checkif;
289 u_short sum;
290 struct in_addr pkt_dst;
291 u_int32_t divert_info = 0; /* packet divert/tee info */
292 struct ip_fw_args args;
293#ifdef PFIL_HOOKS
294 struct packet_filter_hook *pfh;
295 struct mbuf *m0;
296 int rv;
297#endif /* PFIL_HOOKS */
298#ifdef FAST_IPSEC
299 struct m_tag *mtag;
300 struct tdb_ident *tdbi;
301 struct secpolicy *sp;
302 int s, error;
303#endif /* FAST_IPSEC */
304
305 args.eh = NULL;
306 args.oif = NULL;
307 args.rule = NULL;
308 args.divert_rule = 0; /* divert cookie */
309 args.next_hop = NULL;
310
311 /* Grab info from MT_TAG mbufs prepended to the chain. */
312 for (; m && m->m_type == MT_TAG; m = m->m_next) {
313 switch(m->_m_tag_id) {
314 default:
315 printf("ip_input: unrecognised MT_TAG tag %d\n",
316 m->_m_tag_id);
317 break;
318
319 case PACKET_TAG_DUMMYNET:
320 args.rule = ((struct dn_pkt *)m)->rule;
321 break;
322
323 case PACKET_TAG_DIVERT:
324 args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff;
325 break;
326
327 case PACKET_TAG_IPFORWARD:
328 args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
329 break;
330 }
331 }
332
333 KASSERT(m != NULL && (m->m_flags & M_PKTHDR) != 0,
334 ("ip_input: no HDR"));
335
336 if (args.rule) { /* dummynet already filtered us */
337 ip = mtod(m, struct ip *);
37#include "opt_bootp.h"
38#include "opt_ipfw.h"
39#include "opt_ipdn.h"
40#include "opt_ipdivert.h"
41#include "opt_ipfilter.h"
42#include "opt_ipstealth.h"
43#include "opt_ipsec.h"
44#include "opt_mac.h"
45#include "opt_pfil_hooks.h"
46#include "opt_random_ip_id.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/mac.h>
51#include <sys/mbuf.h>
52#include <sys/malloc.h>
53#include <sys/domain.h>
54#include <sys/protosw.h>
55#include <sys/socket.h>
56#include <sys/time.h>
57#include <sys/kernel.h>
58#include <sys/syslog.h>
59#include <sys/sysctl.h>
60
61#include <net/pfil.h>
62#include <net/if.h>
63#include <net/if_types.h>
64#include <net/if_var.h>
65#include <net/if_dl.h>
66#include <net/route.h>
67#include <net/netisr.h>
68#include <net/intrq.h>
69
70#include <netinet/in.h>
71#include <netinet/in_systm.h>
72#include <netinet/in_var.h>
73#include <netinet/ip.h>
74#include <netinet/in_pcb.h>
75#include <netinet/ip_var.h>
76#include <netinet/ip_icmp.h>
77#include <machine/in_cksum.h>
78
79#include <sys/socketvar.h>
80
81#include <netinet/ip_fw.h>
82#include <netinet/ip_dummynet.h>
83
84#ifdef IPSEC
85#include <netinet6/ipsec.h>
86#include <netkey/key.h>
87#endif
88
89#ifdef FAST_IPSEC
90#include <netipsec/ipsec.h>
91#include <netipsec/key.h>
92#endif
93
94int rsvp_on = 0;
95
96int ipforwarding = 0;
97SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
98 &ipforwarding, 0, "Enable IP forwarding between interfaces");
99
100static int ipsendredirects = 1; /* XXX */
101SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
102 &ipsendredirects, 0, "Enable sending IP redirects");
103
104int ip_defttl = IPDEFTTL;
105SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
106 &ip_defttl, 0, "Maximum TTL on IP packets");
107
108static int ip_dosourceroute = 0;
109SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
110 &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
111
112static int ip_acceptsourceroute = 0;
113SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
114 CTLFLAG_RW, &ip_acceptsourceroute, 0,
115 "Enable accepting source routed IP packets");
116
117static int ip_keepfaith = 0;
118SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
119 &ip_keepfaith, 0,
120 "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
121
122static int ip_nfragpackets = 0;
123static int ip_maxfragpackets; /* initialized in ip_init() */
124SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
125 &ip_maxfragpackets, 0,
126 "Maximum number of IPv4 fragment reassembly queue entries");
127
128/*
129 * XXX - Setting ip_checkinterface mostly implements the receive side of
130 * the Strong ES model described in RFC 1122, but since the routing table
131 * and transmit implementation do not implement the Strong ES model,
132 * setting this to 1 results in an odd hybrid.
133 *
134 * XXX - ip_checkinterface currently must be disabled if you use ipnat
135 * to translate the destination address to another local interface.
136 *
137 * XXX - ip_checkinterface must be disabled if you add IP aliases
138 * to the loopback interface instead of the interface where the
139 * packets for those addresses are received.
140 */
141static int ip_checkinterface = 1;
142SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
143 &ip_checkinterface, 0, "Verify packet arrives on correct interface");
144
145#ifdef DIAGNOSTIC
146static int ipprintfs = 0;
147#endif
148
149static int ipqmaxlen = IFQ_MAXLEN;
150
151extern struct domain inetdomain;
152extern struct protosw inetsw[];
153u_char ip_protox[IPPROTO_MAX];
154struct in_ifaddrhead in_ifaddrhead; /* first inet address */
155struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */
156u_long in_ifaddrhmask; /* mask for hash table */
157
158SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
159 &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
160SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
161 &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
162
163struct ipstat ipstat;
164SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
165 &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
166
167/* Packet reassembly stuff */
168#define IPREASS_NHASH_LOG2 6
169#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
170#define IPREASS_HMASK (IPREASS_NHASH - 1)
171#define IPREASS_HASH(x,y) \
172 (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
173
174static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
175static int nipq = 0; /* total # of reass queues */
176static int maxnipq;
177
178#ifdef IPCTL_DEFMTU
179SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
180 &ip_mtu, 0, "Default MTU");
181#endif
182
183#ifdef IPSTEALTH
184static int ipstealth = 0;
185SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
186 &ipstealth, 0, "");
187#endif
188
189
190/* Firewall hooks */
191ip_fw_chk_t *ip_fw_chk_ptr;
192int fw_enable = 1 ;
193
194/* Dummynet hooks */
195ip_dn_io_t *ip_dn_io_ptr;
196
197
198/*
199 * XXX this is ugly -- the following two global variables are
200 * used to store packet state while it travels through the stack.
201 * Note that the code even makes assumptions on the size and
202 * alignment of fields inside struct ip_srcrt so e.g. adding some
203 * fields will break the code. This needs to be fixed.
204 *
205 * We need to save the IP options in case a protocol wants to respond
206 * to an incoming packet over the same route if the packet got here
207 * using IP source routing. This allows connection establishment and
208 * maintenance when the remote end is on a network that is not known
209 * to us.
210 */
211static int ip_nhops = 0;
212static struct ip_srcrt {
213 struct in_addr dst; /* final destination */
214 char nop; /* one NOP to align */
215 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
216 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
217} ip_srcrt;
218
219static void save_rte(u_char *, struct in_addr);
220static int ip_dooptions(struct mbuf *m, int,
221 struct sockaddr_in *next_hop);
222static void ip_forward(struct mbuf *m, int srcrt,
223 struct sockaddr_in *next_hop);
224static void ip_freef(struct ipqhead *, struct ipq *);
225static struct mbuf *ip_reass(struct mbuf *, struct ipqhead *,
226 struct ipq *, u_int32_t *, u_int16_t *);
227static void ipintr(void);
228
229/*
230 * IP initialization: fill in IP protocol switch table.
231 * All protocols not implemented in kernel go to raw IP protocol handler.
232 */
233void
234ip_init()
235{
236 register struct protosw *pr;
237 register int i;
238
239 TAILQ_INIT(&in_ifaddrhead);
240 in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
241 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
242 if (pr == 0)
243 panic("ip_init");
244 for (i = 0; i < IPPROTO_MAX; i++)
245 ip_protox[i] = pr - inetsw;
246 for (pr = inetdomain.dom_protosw;
247 pr < inetdomain.dom_protoswNPROTOSW; pr++)
248 if (pr->pr_domain->dom_family == PF_INET &&
249 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
250 ip_protox[pr->pr_protocol] = pr - inetsw;
251
252 for (i = 0; i < IPREASS_NHASH; i++)
253 TAILQ_INIT(&ipq[i]);
254
255 maxnipq = nmbclusters / 4;
256 ip_maxfragpackets = nmbclusters / 4;
257
258#ifndef RANDOM_IP_ID
259 ip_id = time_second & 0xffff;
260#endif
261 ipintrq.ifq_maxlen = ipqmaxlen;
262 mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
263 ipintrq_present = 1;
264
265 register_netisr(NETISR_IP, ipintr);
266}
267
268/*
269 * XXX watch out this one. It is perhaps used as a cache for
270 * the most recently used route ? it is cleared in in_addroute()
271 * when a new route is successfully created.
272 */
273struct route ipforward_rt;
274
275/*
276 * Ip input routine. Checksum and byte swap header. If fragmented
277 * try to reassemble. Process options. Pass to next level.
278 */
279void
280ip_input(struct mbuf *m)
281{
282 struct ip *ip;
283 struct ipq *fp;
284 struct in_ifaddr *ia = NULL;
285 struct ifaddr *ifa;
286 int i, hlen, checkif;
287 u_short sum;
288 struct in_addr pkt_dst;
289 u_int32_t divert_info = 0; /* packet divert/tee info */
290 struct ip_fw_args args;
291#ifdef PFIL_HOOKS
292 struct packet_filter_hook *pfh;
293 struct mbuf *m0;
294 int rv;
295#endif /* PFIL_HOOKS */
296#ifdef FAST_IPSEC
297 struct m_tag *mtag;
298 struct tdb_ident *tdbi;
299 struct secpolicy *sp;
300 int s, error;
301#endif /* FAST_IPSEC */
302
303 args.eh = NULL;
304 args.oif = NULL;
305 args.rule = NULL;
306 args.divert_rule = 0; /* divert cookie */
307 args.next_hop = NULL;
308
309 /* Grab info from MT_TAG mbufs prepended to the chain. */
310 for (; m && m->m_type == MT_TAG; m = m->m_next) {
311 switch(m->_m_tag_id) {
312 default:
313 printf("ip_input: unrecognised MT_TAG tag %d\n",
314 m->_m_tag_id);
315 break;
316
317 case PACKET_TAG_DUMMYNET:
318 args.rule = ((struct dn_pkt *)m)->rule;
319 break;
320
321 case PACKET_TAG_DIVERT:
322 args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff;
323 break;
324
325 case PACKET_TAG_IPFORWARD:
326 args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
327 break;
328 }
329 }
330
331 KASSERT(m != NULL && (m->m_flags & M_PKTHDR) != 0,
332 ("ip_input: no HDR"));
333
334 if (args.rule) { /* dummynet already filtered us */
335 ip = mtod(m, struct ip *);
338 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
336 hlen = ip->ip_hl << 2;
339 goto iphack ;
340 }
341
342 ipstat.ips_total++;
343
344 if (m->m_pkthdr.len < sizeof(struct ip))
345 goto tooshort;
346
347 if (m->m_len < sizeof (struct ip) &&
348 (m = m_pullup(m, sizeof (struct ip))) == 0) {
349 ipstat.ips_toosmall++;
350 return;
351 }
352 ip = mtod(m, struct ip *);
353
337 goto iphack ;
338 }
339
340 ipstat.ips_total++;
341
342 if (m->m_pkthdr.len < sizeof(struct ip))
343 goto tooshort;
344
345 if (m->m_len < sizeof (struct ip) &&
346 (m = m_pullup(m, sizeof (struct ip))) == 0) {
347 ipstat.ips_toosmall++;
348 return;
349 }
350 ip = mtod(m, struct ip *);
351
354 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
352 if (ip->ip_v != IPVERSION) {
355 ipstat.ips_badvers++;
356 goto bad;
357 }
358
353 ipstat.ips_badvers++;
354 goto bad;
355 }
356
359 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
357 hlen = ip->ip_hl << 2;
360 if (hlen < sizeof(struct ip)) { /* minimum header length */
361 ipstat.ips_badhlen++;
362 goto bad;
363 }
364 if (hlen > m->m_len) {
365 if ((m = m_pullup(m, hlen)) == 0) {
366 ipstat.ips_badhlen++;
367 return;
368 }
369 ip = mtod(m, struct ip *);
370 }
371
372 /* 127/8 must not appear on wire - RFC1122 */
373 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
374 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
375 if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
376 ipstat.ips_badaddr++;
377 goto bad;
378 }
379 }
380
381 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
382 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
383 } else {
384 if (hlen == sizeof(struct ip)) {
385 sum = in_cksum_hdr(ip);
386 } else {
387 sum = in_cksum(m, hlen);
388 }
389 }
390 if (sum) {
391 ipstat.ips_badsum++;
392 goto bad;
393 }
394
395 /*
396 * Convert fields to host representation.
397 */
398 ip->ip_len = ntohs(ip->ip_len);
399 if (ip->ip_len < hlen) {
400 ipstat.ips_badlen++;
401 goto bad;
402 }
403 ip->ip_off = ntohs(ip->ip_off);
404
405 /*
406 * Check that the amount of data in the buffers
407 * is as at least much as the IP header would have us expect.
408 * Trim mbufs if longer than we expect.
409 * Drop packet if shorter than we expect.
410 */
411 if (m->m_pkthdr.len < ip->ip_len) {
412tooshort:
413 ipstat.ips_tooshort++;
414 goto bad;
415 }
416 if (m->m_pkthdr.len > ip->ip_len) {
417 if (m->m_len == m->m_pkthdr.len) {
418 m->m_len = ip->ip_len;
419 m->m_pkthdr.len = ip->ip_len;
420 } else
421 m_adj(m, ip->ip_len - m->m_pkthdr.len);
422 }
423
424 /*
425 * IpHack's section.
426 * Right now when no processing on packet has done
427 * and it is still fresh out of network we do our black
428 * deals with it.
429 * - Firewall: deny/allow/divert
430 * - Xlate: translate packet's addr/port (NAT).
431 * - Pipe: pass pkt through dummynet.
432 * - Wrap: fake packet's addr/port <unimpl.>
433 * - Encapsulate: put it in another IP and send out. <unimp.>
434 */
435
436iphack:
437
438#ifdef PFIL_HOOKS
439 /*
440 * Run through list of hooks for input packets. If there are any
441 * filters which require that additional packets in the flow are
442 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
443 * Note that filters must _never_ set this flag, as another filter
444 * in the list may have previously cleared it.
445 */
446 m0 = m;
447 pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
448 for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
449 if (pfh->pfil_func) {
450 rv = pfh->pfil_func(ip, hlen,
451 m->m_pkthdr.rcvif, 0, &m0);
452 if (rv)
453 return;
454 m = m0;
455 if (m == NULL)
456 return;
457 ip = mtod(m, struct ip *);
458 }
459#endif /* PFIL_HOOKS */
460
461 if (fw_enable && IPFW_LOADED) {
462 /*
463 * If we've been forwarded from the output side, then
464 * skip the firewall a second time
465 */
466 if (args.next_hop)
467 goto ours;
468
469 args.m = m;
470 i = ip_fw_chk_ptr(&args);
471 m = args.m;
472
473 if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
474 if (m)
475 m_freem(m);
476 return;
477 }
478 ip = mtod(m, struct ip *); /* just in case m changed */
479 if (i == 0 && args.next_hop == NULL) /* common case */
480 goto pass;
481 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
482 /* Send packet to the appropriate pipe */
483 ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
484 return;
485 }
486#ifdef IPDIVERT
487 if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
488 /* Divert or tee packet */
489 divert_info = i;
490 goto ours;
491 }
492#endif
493 if (i == 0 && args.next_hop != NULL)
494 goto pass;
495 /*
496 * if we get here, the packet must be dropped
497 */
498 m_freem(m);
499 return;
500 }
501pass:
502
503 /*
504 * Process options and, if not destined for us,
505 * ship it on. ip_dooptions returns 1 when an
506 * error was detected (causing an icmp message
507 * to be sent and the original packet to be freed).
508 */
509 ip_nhops = 0; /* for source routed packets */
510 if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop))
511 return;
512
513 /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
514 * matter if it is destined to another node, or whether it is
515 * a multicast one, RSVP wants it! and prevents it from being forwarded
516 * anywhere else. Also checks if the rsvp daemon is running before
517 * grabbing the packet.
518 */
519 if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
520 goto ours;
521
522 /*
523 * Check our list of addresses, to see if the packet is for us.
524 * If we don't have any addresses, assume any unicast packet
525 * we receive might be for us (and let the upper layers deal
526 * with it).
527 */
528 if (TAILQ_EMPTY(&in_ifaddrhead) &&
529 (m->m_flags & (M_MCAST|M_BCAST)) == 0)
530 goto ours;
531
532 /*
533 * Cache the destination address of the packet; this may be
534 * changed by use of 'ipfw fwd'.
535 */
536 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
537
538 /*
539 * Enable a consistency check between the destination address
540 * and the arrival interface for a unicast packet (the RFC 1122
541 * strong ES model) if IP forwarding is disabled and the packet
542 * is not locally generated and the packet is not subject to
543 * 'ipfw fwd'.
544 *
545 * XXX - Checking also should be disabled if the destination
546 * address is ipnat'ed to a different interface.
547 *
548 * XXX - Checking is incompatible with IP aliases added
549 * to the loopback interface instead of the interface where
550 * the packets are received.
551 */
552 checkif = ip_checkinterface && (ipforwarding == 0) &&
553 m->m_pkthdr.rcvif != NULL &&
554 ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
555 (args.next_hop == NULL);
556
557 /*
558 * Check for exact addresses in the hash bucket.
559 */
560 LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) {
561 /*
562 * If the address matches, verify that the packet
563 * arrived via the correct interface if checking is
564 * enabled.
565 */
566 if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr &&
567 (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
568 goto ours;
569 }
570 /*
571 * Check for broadcast addresses.
572 *
573 * Only accept broadcast packets that arrive via the matching
574 * interface. Reception of forwarded directed broadcasts would
575 * be handled via ip_forward() and ether_output() with the loopback
576 * into the stack for SIMPLEX interfaces handled by ether_output().
577 */
578 if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
579 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
580 if (ifa->ifa_addr->sa_family != AF_INET)
581 continue;
582 ia = ifatoia(ifa);
583 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
584 pkt_dst.s_addr)
585 goto ours;
586 if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr)
587 goto ours;
588#ifdef BOOTP_COMPAT
589 if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
590 goto ours;
591#endif
592 }
593 }
594 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
595 struct in_multi *inm;
596 if (ip_mrouter) {
597 /*
598 * If we are acting as a multicast router, all
599 * incoming multicast packets are passed to the
600 * kernel-level multicast forwarding function.
601 * The packet is returned (relatively) intact; if
602 * ip_mforward() returns a non-zero value, the packet
603 * must be discarded, else it may be accepted below.
604 */
605 if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
606 ipstat.ips_cantforward++;
607 m_freem(m);
608 return;
609 }
610
611 /*
612 * The process-level routing daemon needs to receive
613 * all multicast IGMP packets, whether or not this
614 * host belongs to their destination groups.
615 */
616 if (ip->ip_p == IPPROTO_IGMP)
617 goto ours;
618 ipstat.ips_forward++;
619 }
620 /*
621 * See if we belong to the destination multicast group on the
622 * arrival interface.
623 */
624 IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
625 if (inm == NULL) {
626 ipstat.ips_notmember++;
627 m_freem(m);
628 return;
629 }
630 goto ours;
631 }
632 if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
633 goto ours;
634 if (ip->ip_dst.s_addr == INADDR_ANY)
635 goto ours;
636
637 /*
638 * FAITH(Firewall Aided Internet Translator)
639 */
640 if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
641 if (ip_keepfaith) {
642 if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
643 goto ours;
644 }
645 m_freem(m);
646 return;
647 }
648
649 /*
650 * Not for us; forward if possible and desirable.
651 */
652 if (ipforwarding == 0) {
653 ipstat.ips_cantforward++;
654 m_freem(m);
655 } else {
656#ifdef IPSEC
657 /*
658 * Enforce inbound IPsec SPD.
659 */
660 if (ipsec4_in_reject(m, NULL)) {
661 ipsecstat.in_polvio++;
662 goto bad;
663 }
664#endif /* IPSEC */
665#ifdef FAST_IPSEC
666 mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
667 s = splnet();
668 if (mtag != NULL) {
669 tdbi = (struct tdb_ident *)(mtag + 1);
670 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
671 } else {
672 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
673 IP_FORWARDING, &error);
674 }
675 if (sp == NULL) { /* NB: can happen if error */
676 splx(s);
677 /*XXX error stat???*/
678 DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/
679 goto bad;
680 }
681
682 /*
683 * Check security policy against packet attributes.
684 */
685 error = ipsec_in_reject(sp, m);
686 KEY_FREESP(&sp);
687 splx(s);
688 if (error) {
689 ipstat.ips_cantforward++;
690 goto bad;
691 }
692#endif /* FAST_IPSEC */
693 ip_forward(m, 0, args.next_hop);
694 }
695 return;
696
697ours:
698#ifdef IPSTEALTH
699 /*
700 * IPSTEALTH: Process non-routing options only
701 * if the packet is destined for us.
702 */
703 if (ipstealth && hlen > sizeof (struct ip) &&
704 ip_dooptions(m, 1, args.next_hop))
705 return;
706#endif /* IPSTEALTH */
707
708 /* Count the packet in the ip address stats */
709 if (ia != NULL) {
710 ia->ia_ifa.if_ipackets++;
711 ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
712 }
713
714 /*
715 * If offset or IP_MF are set, must reassemble.
716 * Otherwise, nothing need be done.
717 * (We could look in the reassembly queue to see
718 * if the packet was previously fragmented,
719 * but it's not worth the time; just let them time out.)
720 */
721 if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
722
723 sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
724 /*
725 * Look for queue of fragments
726 * of this datagram.
727 */
728 TAILQ_FOREACH(fp, &ipq[sum], ipq_list)
729 if (ip->ip_id == fp->ipq_id &&
730 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
731 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
732#ifdef MAC
733 mac_fragment_match(m, fp) &&
734#endif
735 ip->ip_p == fp->ipq_p)
736 goto found;
737
738 fp = 0;
739
740 /* check if there's a place for the new queue */
741 if (nipq > maxnipq) {
742 /*
743 * drop something from the tail of the current queue
744 * before proceeding further
745 */
746 struct ipq *q = TAILQ_LAST(&ipq[sum], ipqhead);
747 if (q == NULL) { /* gak */
748 for (i = 0; i < IPREASS_NHASH; i++) {
749 struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
750 if (r) {
751 ip_freef(&ipq[i], r);
752 break;
753 }
754 }
755 } else
756 ip_freef(&ipq[sum], q);
757 }
758found:
759 /*
760 * Adjust ip_len to not reflect header,
761 * convert offset of this to bytes.
762 */
763 ip->ip_len -= hlen;
764 if (ip->ip_off & IP_MF) {
765 /*
766 * Make sure that fragments have a data length
767 * that's a non-zero multiple of 8 bytes.
768 */
769 if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
770 ipstat.ips_toosmall++; /* XXX */
771 goto bad;
772 }
773 m->m_flags |= M_FRAG;
774 } else
775 m->m_flags &= ~M_FRAG;
776 ip->ip_off <<= 3;
777
778 /*
779 * Attempt reassembly; if it succeeds, proceed.
780 * ip_reass() will return a different mbuf, and update
781 * the divert info in divert_info and args.divert_rule.
782 */
783 ipstat.ips_fragments++;
784 m->m_pkthdr.header = ip;
785 m = ip_reass(m,
786 &ipq[sum], fp, &divert_info, &args.divert_rule);
787 if (m == 0)
788 return;
789 ipstat.ips_reassembled++;
790 ip = mtod(m, struct ip *);
791 /* Get the header length of the reassembled packet */
358 if (hlen < sizeof(struct ip)) { /* minimum header length */
359 ipstat.ips_badhlen++;
360 goto bad;
361 }
362 if (hlen > m->m_len) {
363 if ((m = m_pullup(m, hlen)) == 0) {
364 ipstat.ips_badhlen++;
365 return;
366 }
367 ip = mtod(m, struct ip *);
368 }
369
370 /* 127/8 must not appear on wire - RFC1122 */
371 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
372 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
373 if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
374 ipstat.ips_badaddr++;
375 goto bad;
376 }
377 }
378
379 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
380 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
381 } else {
382 if (hlen == sizeof(struct ip)) {
383 sum = in_cksum_hdr(ip);
384 } else {
385 sum = in_cksum(m, hlen);
386 }
387 }
388 if (sum) {
389 ipstat.ips_badsum++;
390 goto bad;
391 }
392
393 /*
394 * Convert fields to host representation.
395 */
396 ip->ip_len = ntohs(ip->ip_len);
397 if (ip->ip_len < hlen) {
398 ipstat.ips_badlen++;
399 goto bad;
400 }
401 ip->ip_off = ntohs(ip->ip_off);
402
403 /*
404 * Check that the amount of data in the buffers
405 * is as at least much as the IP header would have us expect.
406 * Trim mbufs if longer than we expect.
407 * Drop packet if shorter than we expect.
408 */
409 if (m->m_pkthdr.len < ip->ip_len) {
410tooshort:
411 ipstat.ips_tooshort++;
412 goto bad;
413 }
414 if (m->m_pkthdr.len > ip->ip_len) {
415 if (m->m_len == m->m_pkthdr.len) {
416 m->m_len = ip->ip_len;
417 m->m_pkthdr.len = ip->ip_len;
418 } else
419 m_adj(m, ip->ip_len - m->m_pkthdr.len);
420 }
421
422 /*
423 * IpHack's section.
424 * Right now when no processing on packet has done
425 * and it is still fresh out of network we do our black
426 * deals with it.
427 * - Firewall: deny/allow/divert
428 * - Xlate: translate packet's addr/port (NAT).
429 * - Pipe: pass pkt through dummynet.
430 * - Wrap: fake packet's addr/port <unimpl.>
431 * - Encapsulate: put it in another IP and send out. <unimp.>
432 */
433
434iphack:
435
436#ifdef PFIL_HOOKS
437 /*
438 * Run through list of hooks for input packets. If there are any
439 * filters which require that additional packets in the flow are
440 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
441 * Note that filters must _never_ set this flag, as another filter
442 * in the list may have previously cleared it.
443 */
444 m0 = m;
445 pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
446 for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
447 if (pfh->pfil_func) {
448 rv = pfh->pfil_func(ip, hlen,
449 m->m_pkthdr.rcvif, 0, &m0);
450 if (rv)
451 return;
452 m = m0;
453 if (m == NULL)
454 return;
455 ip = mtod(m, struct ip *);
456 }
457#endif /* PFIL_HOOKS */
458
459 if (fw_enable && IPFW_LOADED) {
460 /*
461 * If we've been forwarded from the output side, then
462 * skip the firewall a second time
463 */
464 if (args.next_hop)
465 goto ours;
466
467 args.m = m;
468 i = ip_fw_chk_ptr(&args);
469 m = args.m;
470
471 if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
472 if (m)
473 m_freem(m);
474 return;
475 }
476 ip = mtod(m, struct ip *); /* just in case m changed */
477 if (i == 0 && args.next_hop == NULL) /* common case */
478 goto pass;
479 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
480 /* Send packet to the appropriate pipe */
481 ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
482 return;
483 }
484#ifdef IPDIVERT
485 if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
486 /* Divert or tee packet */
487 divert_info = i;
488 goto ours;
489 }
490#endif
491 if (i == 0 && args.next_hop != NULL)
492 goto pass;
493 /*
494 * if we get here, the packet must be dropped
495 */
496 m_freem(m);
497 return;
498 }
499pass:
500
501 /*
502 * Process options and, if not destined for us,
503 * ship it on. ip_dooptions returns 1 when an
504 * error was detected (causing an icmp message
505 * to be sent and the original packet to be freed).
506 */
507 ip_nhops = 0; /* for source routed packets */
508 if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop))
509 return;
510
511 /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
512 * matter if it is destined to another node, or whether it is
513 * a multicast one, RSVP wants it! and prevents it from being forwarded
514 * anywhere else. Also checks if the rsvp daemon is running before
515 * grabbing the packet.
516 */
517 if (rsvp_on && ip->ip_p==IPPROTO_RSVP)
518 goto ours;
519
520 /*
521 * Check our list of addresses, to see if the packet is for us.
522 * If we don't have any addresses, assume any unicast packet
523 * we receive might be for us (and let the upper layers deal
524 * with it).
525 */
526 if (TAILQ_EMPTY(&in_ifaddrhead) &&
527 (m->m_flags & (M_MCAST|M_BCAST)) == 0)
528 goto ours;
529
530 /*
531 * Cache the destination address of the packet; this may be
532 * changed by use of 'ipfw fwd'.
533 */
534 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
535
536 /*
537 * Enable a consistency check between the destination address
538 * and the arrival interface for a unicast packet (the RFC 1122
539 * strong ES model) if IP forwarding is disabled and the packet
540 * is not locally generated and the packet is not subject to
541 * 'ipfw fwd'.
542 *
543 * XXX - Checking also should be disabled if the destination
544 * address is ipnat'ed to a different interface.
545 *
546 * XXX - Checking is incompatible with IP aliases added
547 * to the loopback interface instead of the interface where
548 * the packets are received.
549 */
550 checkif = ip_checkinterface && (ipforwarding == 0) &&
551 m->m_pkthdr.rcvif != NULL &&
552 ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
553 (args.next_hop == NULL);
554
555 /*
556 * Check for exact addresses in the hash bucket.
557 */
558 LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) {
559 /*
560 * If the address matches, verify that the packet
561 * arrived via the correct interface if checking is
562 * enabled.
563 */
564 if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr &&
565 (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
566 goto ours;
567 }
568 /*
569 * Check for broadcast addresses.
570 *
571 * Only accept broadcast packets that arrive via the matching
572 * interface. Reception of forwarded directed broadcasts would
573 * be handled via ip_forward() and ether_output() with the loopback
574 * into the stack for SIMPLEX interfaces handled by ether_output().
575 */
576 if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
577 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
578 if (ifa->ifa_addr->sa_family != AF_INET)
579 continue;
580 ia = ifatoia(ifa);
581 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
582 pkt_dst.s_addr)
583 goto ours;
584 if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr)
585 goto ours;
586#ifdef BOOTP_COMPAT
587 if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
588 goto ours;
589#endif
590 }
591 }
592 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
593 struct in_multi *inm;
594 if (ip_mrouter) {
595 /*
596 * If we are acting as a multicast router, all
597 * incoming multicast packets are passed to the
598 * kernel-level multicast forwarding function.
599 * The packet is returned (relatively) intact; if
600 * ip_mforward() returns a non-zero value, the packet
601 * must be discarded, else it may be accepted below.
602 */
603 if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
604 ipstat.ips_cantforward++;
605 m_freem(m);
606 return;
607 }
608
609 /*
610 * The process-level routing daemon needs to receive
611 * all multicast IGMP packets, whether or not this
612 * host belongs to their destination groups.
613 */
614 if (ip->ip_p == IPPROTO_IGMP)
615 goto ours;
616 ipstat.ips_forward++;
617 }
618 /*
619 * See if we belong to the destination multicast group on the
620 * arrival interface.
621 */
622 IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
623 if (inm == NULL) {
624 ipstat.ips_notmember++;
625 m_freem(m);
626 return;
627 }
628 goto ours;
629 }
630 if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
631 goto ours;
632 if (ip->ip_dst.s_addr == INADDR_ANY)
633 goto ours;
634
635 /*
636 * FAITH(Firewall Aided Internet Translator)
637 */
638 if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
639 if (ip_keepfaith) {
640 if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
641 goto ours;
642 }
643 m_freem(m);
644 return;
645 }
646
647 /*
648 * Not for us; forward if possible and desirable.
649 */
650 if (ipforwarding == 0) {
651 ipstat.ips_cantforward++;
652 m_freem(m);
653 } else {
654#ifdef IPSEC
655 /*
656 * Enforce inbound IPsec SPD.
657 */
658 if (ipsec4_in_reject(m, NULL)) {
659 ipsecstat.in_polvio++;
660 goto bad;
661 }
662#endif /* IPSEC */
663#ifdef FAST_IPSEC
664 mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
665 s = splnet();
666 if (mtag != NULL) {
667 tdbi = (struct tdb_ident *)(mtag + 1);
668 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
669 } else {
670 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
671 IP_FORWARDING, &error);
672 }
673 if (sp == NULL) { /* NB: can happen if error */
674 splx(s);
675 /*XXX error stat???*/
676 DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/
677 goto bad;
678 }
679
680 /*
681 * Check security policy against packet attributes.
682 */
683 error = ipsec_in_reject(sp, m);
684 KEY_FREESP(&sp);
685 splx(s);
686 if (error) {
687 ipstat.ips_cantforward++;
688 goto bad;
689 }
690#endif /* FAST_IPSEC */
691 ip_forward(m, 0, args.next_hop);
692 }
693 return;
694
695ours:
696#ifdef IPSTEALTH
697 /*
698 * IPSTEALTH: Process non-routing options only
699 * if the packet is destined for us.
700 */
701 if (ipstealth && hlen > sizeof (struct ip) &&
702 ip_dooptions(m, 1, args.next_hop))
703 return;
704#endif /* IPSTEALTH */
705
706 /* Count the packet in the ip address stats */
707 if (ia != NULL) {
708 ia->ia_ifa.if_ipackets++;
709 ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
710 }
711
712 /*
713 * If offset or IP_MF are set, must reassemble.
714 * Otherwise, nothing need be done.
715 * (We could look in the reassembly queue to see
716 * if the packet was previously fragmented,
717 * but it's not worth the time; just let them time out.)
718 */
719 if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
720
721 sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
722 /*
723 * Look for queue of fragments
724 * of this datagram.
725 */
726 TAILQ_FOREACH(fp, &ipq[sum], ipq_list)
727 if (ip->ip_id == fp->ipq_id &&
728 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
729 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
730#ifdef MAC
731 mac_fragment_match(m, fp) &&
732#endif
733 ip->ip_p == fp->ipq_p)
734 goto found;
735
736 fp = 0;
737
738 /* check if there's a place for the new queue */
739 if (nipq > maxnipq) {
740 /*
741 * drop something from the tail of the current queue
742 * before proceeding further
743 */
744 struct ipq *q = TAILQ_LAST(&ipq[sum], ipqhead);
745 if (q == NULL) { /* gak */
746 for (i = 0; i < IPREASS_NHASH; i++) {
747 struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
748 if (r) {
749 ip_freef(&ipq[i], r);
750 break;
751 }
752 }
753 } else
754 ip_freef(&ipq[sum], q);
755 }
756found:
757 /*
758 * Adjust ip_len to not reflect header,
759 * convert offset of this to bytes.
760 */
761 ip->ip_len -= hlen;
762 if (ip->ip_off & IP_MF) {
763 /*
764 * Make sure that fragments have a data length
765 * that's a non-zero multiple of 8 bytes.
766 */
767 if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
768 ipstat.ips_toosmall++; /* XXX */
769 goto bad;
770 }
771 m->m_flags |= M_FRAG;
772 } else
773 m->m_flags &= ~M_FRAG;
774 ip->ip_off <<= 3;
775
776 /*
777 * Attempt reassembly; if it succeeds, proceed.
778 * ip_reass() will return a different mbuf, and update
779 * the divert info in divert_info and args.divert_rule.
780 */
781 ipstat.ips_fragments++;
782 m->m_pkthdr.header = ip;
783 m = ip_reass(m,
784 &ipq[sum], fp, &divert_info, &args.divert_rule);
785 if (m == 0)
786 return;
787 ipstat.ips_reassembled++;
788 ip = mtod(m, struct ip *);
789 /* Get the header length of the reassembled packet */
792 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
790 hlen = ip->ip_hl << 2;
793#ifdef IPDIVERT
794 /* Restore original checksum before diverting packet */
795 if (divert_info != 0) {
796 ip->ip_len += hlen;
797 ip->ip_len = htons(ip->ip_len);
798 ip->ip_off = htons(ip->ip_off);
799 ip->ip_sum = 0;
800 if (hlen == sizeof(struct ip))
801 ip->ip_sum = in_cksum_hdr(ip);
802 else
803 ip->ip_sum = in_cksum(m, hlen);
804 ip->ip_off = ntohs(ip->ip_off);
805 ip->ip_len = ntohs(ip->ip_len);
806 ip->ip_len -= hlen;
807 }
808#endif
809 } else
810 ip->ip_len -= hlen;
811
812#ifdef IPDIVERT
813 /*
814 * Divert or tee packet to the divert protocol if required.
815 */
816 if (divert_info != 0) {
817 struct mbuf *clone = NULL;
818
819 /* Clone packet if we're doing a 'tee' */
820 if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
821 clone = m_dup(m, M_DONTWAIT);
822
823 /* Restore packet header fields to original values */
824 ip->ip_len += hlen;
825 ip->ip_len = htons(ip->ip_len);
826 ip->ip_off = htons(ip->ip_off);
827
828 /* Deliver packet to divert input routine */
829 divert_packet(m, 1, divert_info & 0xffff, args.divert_rule);
830 ipstat.ips_delivered++;
831
832 /* If 'tee', continue with original packet */
833 if (clone == NULL)
834 return;
835 m = clone;
836 ip = mtod(m, struct ip *);
837 ip->ip_len += hlen;
838 /*
839 * Jump backwards to complete processing of the
840 * packet. But first clear divert_info to avoid
841 * entering this block again.
842 * We do not need to clear args.divert_rule
843 * or args.next_hop as they will not be used.
844 */
845 divert_info = 0;
846 goto pass;
847 }
848#endif
849
850#ifdef IPSEC
851 /*
852 * enforce IPsec policy checking if we are seeing last header.
853 * note that we do not visit this with protocols with pcb layer
854 * code - like udp/tcp/raw ip.
855 */
856 if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
857 ipsec4_in_reject(m, NULL)) {
858 ipsecstat.in_polvio++;
859 goto bad;
860 }
861#endif
862#if FAST_IPSEC
863 /*
864 * enforce IPsec policy checking if we are seeing last header.
865 * note that we do not visit this with protocols with pcb layer
866 * code - like udp/tcp/raw ip.
867 */
868 if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
869 /*
870 * Check if the packet has already had IPsec processing
871 * done. If so, then just pass it along. This tag gets
872 * set during AH, ESP, etc. input handling, before the
873 * packet is returned to the ip input queue for delivery.
874 */
875 mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
876 s = splnet();
877 if (mtag != NULL) {
878 tdbi = (struct tdb_ident *)(mtag + 1);
879 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
880 } else {
881 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
882 IP_FORWARDING, &error);
883 }
884 if (sp != NULL) {
885 /*
886 * Check security policy against packet attributes.
887 */
888 error = ipsec_in_reject(sp, m);
889 KEY_FREESP(&sp);
890 } else {
891 /* XXX error stat??? */
892 error = EINVAL;
893DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
894 goto bad;
895 }
896 splx(s);
897 if (error)
898 goto bad;
899 }
900#endif /* FAST_IPSEC */
901
902 /*
903 * Switch out to protocol's input routine.
904 */
905 ipstat.ips_delivered++;
906 if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
907 /* TCP needs IPFORWARD info if available */
908 struct m_hdr tag;
909
910 tag.mh_type = MT_TAG;
911 tag.mh_flags = PACKET_TAG_IPFORWARD;
912 tag.mh_data = (caddr_t)args.next_hop;
913 tag.mh_next = m;
914
915 (*inetsw[ip_protox[ip->ip_p]].pr_input)(
916 (struct mbuf *)&tag, hlen);
917 } else
918 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
919 return;
920bad:
921 m_freem(m);
922}
923
924/*
925 * IP software interrupt routine - to go away sometime soon
926 */
927static void
928ipintr(void)
929{
930 struct mbuf *m;
931
932 while (1) {
933 IF_DEQUEUE(&ipintrq, m);
934 if (m == 0)
935 return;
936 ip_input(m);
937 }
938}
939
940/*
941 * Take incoming datagram fragment and try to reassemble it into
942 * whole datagram. If a chain for reassembly of this datagram already
943 * exists, then it is given as fp; otherwise have to make a chain.
944 *
945 * When IPDIVERT enabled, keep additional state with each packet that
946 * tells us if we need to divert or tee the packet we're building.
947 * In particular, *divinfo includes the port and TEE flag,
948 * *divert_rule is the number of the matching rule.
949 */
950
951static struct mbuf *
952ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
953 u_int32_t *divinfo, u_int16_t *divert_rule)
954{
955 struct ip *ip = mtod(m, struct ip *);
956 register struct mbuf *p, *q, *nq;
957 struct mbuf *t;
791#ifdef IPDIVERT
792 /* Restore original checksum before diverting packet */
793 if (divert_info != 0) {
794 ip->ip_len += hlen;
795 ip->ip_len = htons(ip->ip_len);
796 ip->ip_off = htons(ip->ip_off);
797 ip->ip_sum = 0;
798 if (hlen == sizeof(struct ip))
799 ip->ip_sum = in_cksum_hdr(ip);
800 else
801 ip->ip_sum = in_cksum(m, hlen);
802 ip->ip_off = ntohs(ip->ip_off);
803 ip->ip_len = ntohs(ip->ip_len);
804 ip->ip_len -= hlen;
805 }
806#endif
807 } else
808 ip->ip_len -= hlen;
809
810#ifdef IPDIVERT
811 /*
812 * Divert or tee packet to the divert protocol if required.
813 */
814 if (divert_info != 0) {
815 struct mbuf *clone = NULL;
816
817 /* Clone packet if we're doing a 'tee' */
818 if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
819 clone = m_dup(m, M_DONTWAIT);
820
821 /* Restore packet header fields to original values */
822 ip->ip_len += hlen;
823 ip->ip_len = htons(ip->ip_len);
824 ip->ip_off = htons(ip->ip_off);
825
826 /* Deliver packet to divert input routine */
827 divert_packet(m, 1, divert_info & 0xffff, args.divert_rule);
828 ipstat.ips_delivered++;
829
830 /* If 'tee', continue with original packet */
831 if (clone == NULL)
832 return;
833 m = clone;
834 ip = mtod(m, struct ip *);
835 ip->ip_len += hlen;
836 /*
837 * Jump backwards to complete processing of the
838 * packet. But first clear divert_info to avoid
839 * entering this block again.
840 * We do not need to clear args.divert_rule
841 * or args.next_hop as they will not be used.
842 */
843 divert_info = 0;
844 goto pass;
845 }
846#endif
847
848#ifdef IPSEC
849 /*
850 * enforce IPsec policy checking if we are seeing last header.
851 * note that we do not visit this with protocols with pcb layer
852 * code - like udp/tcp/raw ip.
853 */
854 if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
855 ipsec4_in_reject(m, NULL)) {
856 ipsecstat.in_polvio++;
857 goto bad;
858 }
859#endif
860#if FAST_IPSEC
861 /*
862 * enforce IPsec policy checking if we are seeing last header.
863 * note that we do not visit this with protocols with pcb layer
864 * code - like udp/tcp/raw ip.
865 */
866 if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
867 /*
868 * Check if the packet has already had IPsec processing
869 * done. If so, then just pass it along. This tag gets
870 * set during AH, ESP, etc. input handling, before the
871 * packet is returned to the ip input queue for delivery.
872 */
873 mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
874 s = splnet();
875 if (mtag != NULL) {
876 tdbi = (struct tdb_ident *)(mtag + 1);
877 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
878 } else {
879 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
880 IP_FORWARDING, &error);
881 }
882 if (sp != NULL) {
883 /*
884 * Check security policy against packet attributes.
885 */
886 error = ipsec_in_reject(sp, m);
887 KEY_FREESP(&sp);
888 } else {
889 /* XXX error stat??? */
890 error = EINVAL;
891DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
892 goto bad;
893 }
894 splx(s);
895 if (error)
896 goto bad;
897 }
898#endif /* FAST_IPSEC */
899
900 /*
901 * Switch out to protocol's input routine.
902 */
903 ipstat.ips_delivered++;
904 if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
905 /* TCP needs IPFORWARD info if available */
906 struct m_hdr tag;
907
908 tag.mh_type = MT_TAG;
909 tag.mh_flags = PACKET_TAG_IPFORWARD;
910 tag.mh_data = (caddr_t)args.next_hop;
911 tag.mh_next = m;
912
913 (*inetsw[ip_protox[ip->ip_p]].pr_input)(
914 (struct mbuf *)&tag, hlen);
915 } else
916 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
917 return;
918bad:
919 m_freem(m);
920}
921
922/*
923 * IP software interrupt routine - to go away sometime soon
924 */
925static void
926ipintr(void)
927{
928 struct mbuf *m;
929
930 while (1) {
931 IF_DEQUEUE(&ipintrq, m);
932 if (m == 0)
933 return;
934 ip_input(m);
935 }
936}
937
938/*
939 * Take incoming datagram fragment and try to reassemble it into
940 * whole datagram. If a chain for reassembly of this datagram already
941 * exists, then it is given as fp; otherwise have to make a chain.
942 *
943 * When IPDIVERT enabled, keep additional state with each packet that
944 * tells us if we need to divert or tee the packet we're building.
945 * In particular, *divinfo includes the port and TEE flag,
946 * *divert_rule is the number of the matching rule.
947 */
948
949static struct mbuf *
950ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
951 u_int32_t *divinfo, u_int16_t *divert_rule)
952{
953 struct ip *ip = mtod(m, struct ip *);
954 register struct mbuf *p, *q, *nq;
955 struct mbuf *t;
958 int hlen = IP_VHL_HL(ip->ip_vhl) << 2;
956 int hlen = ip->ip_hl << 2;
959 int i, next;
960
961 /*
962 * Presence of header sizes in mbufs
963 * would confuse code below.
964 */
965 m->m_data += hlen;
966 m->m_len -= hlen;
967
968 /*
969 * If first fragment to arrive, create a reassembly queue.
970 */
971 if (fp == 0) {
972 /*
973 * Enforce upper bound on number of fragmented packets
974 * for which we attempt reassembly;
975 * If maxfrag is 0, never accept fragments.
976 * If maxfrag is -1, accept all fragments without limitation.
977 */
978 if ((ip_maxfragpackets >= 0) && (ip_nfragpackets >= ip_maxfragpackets))
979 goto dropfrag;
980 ip_nfragpackets++;
981 if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
982 goto dropfrag;
983 fp = mtod(t, struct ipq *);
984#ifdef MAC
985 mac_init_ipq(fp);
986 mac_create_ipq(m, fp);
987#endif
988 TAILQ_INSERT_HEAD(head, fp, ipq_list);
989 nipq++;
990 fp->ipq_ttl = IPFRAGTTL;
991 fp->ipq_p = ip->ip_p;
992 fp->ipq_id = ip->ip_id;
993 fp->ipq_src = ip->ip_src;
994 fp->ipq_dst = ip->ip_dst;
995 fp->ipq_frags = m;
996 m->m_nextpkt = NULL;
997#ifdef IPDIVERT
998 fp->ipq_div_info = 0;
999 fp->ipq_div_cookie = 0;
1000#endif
1001 goto inserted;
1002 } else {
1003#ifdef MAC
1004 mac_update_ipq(m, fp);
1005#endif
1006 }
1007
1008#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header))
1009
1010 /*
1011 * Find a segment which begins after this one does.
1012 */
1013 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
1014 if (GETIP(q)->ip_off > ip->ip_off)
1015 break;
1016
1017 /*
1018 * If there is a preceding segment, it may provide some of
1019 * our data already. If so, drop the data from the incoming
1020 * segment. If it provides all of our data, drop us, otherwise
1021 * stick new segment in the proper place.
1022 *
1023 * If some of the data is dropped from the the preceding
1024 * segment, then it's checksum is invalidated.
1025 */
1026 if (p) {
1027 i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
1028 if (i > 0) {
1029 if (i >= ip->ip_len)
1030 goto dropfrag;
1031 m_adj(m, i);
1032 m->m_pkthdr.csum_flags = 0;
1033 ip->ip_off += i;
1034 ip->ip_len -= i;
1035 }
1036 m->m_nextpkt = p->m_nextpkt;
1037 p->m_nextpkt = m;
1038 } else {
1039 m->m_nextpkt = fp->ipq_frags;
1040 fp->ipq_frags = m;
1041 }
1042
1043 /*
1044 * While we overlap succeeding segments trim them or,
1045 * if they are completely covered, dequeue them.
1046 */
1047 for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
1048 q = nq) {
1049 i = (ip->ip_off + ip->ip_len) -
1050 GETIP(q)->ip_off;
1051 if (i < GETIP(q)->ip_len) {
1052 GETIP(q)->ip_len -= i;
1053 GETIP(q)->ip_off += i;
1054 m_adj(q, i);
1055 q->m_pkthdr.csum_flags = 0;
1056 break;
1057 }
1058 nq = q->m_nextpkt;
1059 m->m_nextpkt = nq;
1060 m_freem(q);
1061 }
1062
1063inserted:
1064
1065#ifdef IPDIVERT
1066 /*
1067 * Transfer firewall instructions to the fragment structure.
1068 * Only trust info in the fragment at offset 0.
1069 */
1070 if (ip->ip_off == 0) {
1071 fp->ipq_div_info = *divinfo;
1072 fp->ipq_div_cookie = *divert_rule;
1073 }
1074 *divinfo = 0;
1075 *divert_rule = 0;
1076#endif
1077
1078 /*
1079 * Check for complete reassembly.
1080 */
1081 next = 0;
1082 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
1083 if (GETIP(q)->ip_off != next)
1084 return (0);
1085 next += GETIP(q)->ip_len;
1086 }
1087 /* Make sure the last packet didn't have the IP_MF flag */
1088 if (p->m_flags & M_FRAG)
1089 return (0);
1090
1091 /*
1092 * Reassembly is complete. Make sure the packet is a sane size.
1093 */
1094 q = fp->ipq_frags;
1095 ip = GETIP(q);
957 int i, next;
958
959 /*
960 * Presence of header sizes in mbufs
961 * would confuse code below.
962 */
963 m->m_data += hlen;
964 m->m_len -= hlen;
965
966 /*
967 * If first fragment to arrive, create a reassembly queue.
968 */
969 if (fp == 0) {
970 /*
971 * Enforce upper bound on number of fragmented packets
972 * for which we attempt reassembly;
973 * If maxfrag is 0, never accept fragments.
974 * If maxfrag is -1, accept all fragments without limitation.
975 */
976 if ((ip_maxfragpackets >= 0) && (ip_nfragpackets >= ip_maxfragpackets))
977 goto dropfrag;
978 ip_nfragpackets++;
979 if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
980 goto dropfrag;
981 fp = mtod(t, struct ipq *);
982#ifdef MAC
983 mac_init_ipq(fp);
984 mac_create_ipq(m, fp);
985#endif
986 TAILQ_INSERT_HEAD(head, fp, ipq_list);
987 nipq++;
988 fp->ipq_ttl = IPFRAGTTL;
989 fp->ipq_p = ip->ip_p;
990 fp->ipq_id = ip->ip_id;
991 fp->ipq_src = ip->ip_src;
992 fp->ipq_dst = ip->ip_dst;
993 fp->ipq_frags = m;
994 m->m_nextpkt = NULL;
995#ifdef IPDIVERT
996 fp->ipq_div_info = 0;
997 fp->ipq_div_cookie = 0;
998#endif
999 goto inserted;
1000 } else {
1001#ifdef MAC
1002 mac_update_ipq(m, fp);
1003#endif
1004 }
1005
1006#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header))
1007
1008 /*
1009 * Find a segment which begins after this one does.
1010 */
1011 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
1012 if (GETIP(q)->ip_off > ip->ip_off)
1013 break;
1014
1015 /*
1016 * If there is a preceding segment, it may provide some of
1017 * our data already. If so, drop the data from the incoming
1018 * segment. If it provides all of our data, drop us, otherwise
1019 * stick new segment in the proper place.
1020 *
1021 * If some of the data is dropped from the the preceding
1022 * segment, then it's checksum is invalidated.
1023 */
1024 if (p) {
1025 i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
1026 if (i > 0) {
1027 if (i >= ip->ip_len)
1028 goto dropfrag;
1029 m_adj(m, i);
1030 m->m_pkthdr.csum_flags = 0;
1031 ip->ip_off += i;
1032 ip->ip_len -= i;
1033 }
1034 m->m_nextpkt = p->m_nextpkt;
1035 p->m_nextpkt = m;
1036 } else {
1037 m->m_nextpkt = fp->ipq_frags;
1038 fp->ipq_frags = m;
1039 }
1040
1041 /*
1042 * While we overlap succeeding segments trim them or,
1043 * if they are completely covered, dequeue them.
1044 */
1045 for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
1046 q = nq) {
1047 i = (ip->ip_off + ip->ip_len) -
1048 GETIP(q)->ip_off;
1049 if (i < GETIP(q)->ip_len) {
1050 GETIP(q)->ip_len -= i;
1051 GETIP(q)->ip_off += i;
1052 m_adj(q, i);
1053 q->m_pkthdr.csum_flags = 0;
1054 break;
1055 }
1056 nq = q->m_nextpkt;
1057 m->m_nextpkt = nq;
1058 m_freem(q);
1059 }
1060
1061inserted:
1062
1063#ifdef IPDIVERT
1064 /*
1065 * Transfer firewall instructions to the fragment structure.
1066 * Only trust info in the fragment at offset 0.
1067 */
1068 if (ip->ip_off == 0) {
1069 fp->ipq_div_info = *divinfo;
1070 fp->ipq_div_cookie = *divert_rule;
1071 }
1072 *divinfo = 0;
1073 *divert_rule = 0;
1074#endif
1075
1076 /*
1077 * Check for complete reassembly.
1078 */
1079 next = 0;
1080 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
1081 if (GETIP(q)->ip_off != next)
1082 return (0);
1083 next += GETIP(q)->ip_len;
1084 }
1085 /* Make sure the last packet didn't have the IP_MF flag */
1086 if (p->m_flags & M_FRAG)
1087 return (0);
1088
1089 /*
1090 * Reassembly is complete. Make sure the packet is a sane size.
1091 */
1092 q = fp->ipq_frags;
1093 ip = GETIP(q);
1096 if (next + (IP_VHL_HL(ip->ip_vhl) << 2) > IP_MAXPACKET) {
1094 if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
1097 ipstat.ips_toolong++;
1098 ip_freef(head, fp);
1099 return (0);
1100 }
1101
1102 /*
1103 * Concatenate fragments.
1104 */
1105 m = q;
1106 t = m->m_next;
1107 m->m_next = 0;
1108 m_cat(m, t);
1109 nq = q->m_nextpkt;
1110 q->m_nextpkt = 0;
1111 for (q = nq; q != NULL; q = nq) {
1112 nq = q->m_nextpkt;
1113 q->m_nextpkt = NULL;
1114 m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
1115 m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
1116 m_cat(m, q);
1117 }
1118#ifdef MAC
1119 mac_create_datagram_from_ipq(fp, m);
1120 mac_destroy_ipq(fp);
1121#endif
1122
1123#ifdef IPDIVERT
1124 /*
1125 * Extract firewall instructions from the fragment structure.
1126 */
1127 *divinfo = fp->ipq_div_info;
1128 *divert_rule = fp->ipq_div_cookie;
1129#endif
1130
1131 /*
1132 * Create header for new ip packet by
1133 * modifying header of first packet;
1134 * dequeue and discard fragment reassembly header.
1135 * Make header visible.
1136 */
1137 ip->ip_len = next;
1138 ip->ip_src = fp->ipq_src;
1139 ip->ip_dst = fp->ipq_dst;
1140 TAILQ_REMOVE(head, fp, ipq_list);
1141 nipq--;
1142 (void) m_free(dtom(fp));
1143 ip_nfragpackets--;
1095 ipstat.ips_toolong++;
1096 ip_freef(head, fp);
1097 return (0);
1098 }
1099
1100 /*
1101 * Concatenate fragments.
1102 */
1103 m = q;
1104 t = m->m_next;
1105 m->m_next = 0;
1106 m_cat(m, t);
1107 nq = q->m_nextpkt;
1108 q->m_nextpkt = 0;
1109 for (q = nq; q != NULL; q = nq) {
1110 nq = q->m_nextpkt;
1111 q->m_nextpkt = NULL;
1112 m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
1113 m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
1114 m_cat(m, q);
1115 }
1116#ifdef MAC
1117 mac_create_datagram_from_ipq(fp, m);
1118 mac_destroy_ipq(fp);
1119#endif
1120
1121#ifdef IPDIVERT
1122 /*
1123 * Extract firewall instructions from the fragment structure.
1124 */
1125 *divinfo = fp->ipq_div_info;
1126 *divert_rule = fp->ipq_div_cookie;
1127#endif
1128
1129 /*
1130 * Create header for new ip packet by
1131 * modifying header of first packet;
1132 * dequeue and discard fragment reassembly header.
1133 * Make header visible.
1134 */
1135 ip->ip_len = next;
1136 ip->ip_src = fp->ipq_src;
1137 ip->ip_dst = fp->ipq_dst;
1138 TAILQ_REMOVE(head, fp, ipq_list);
1139 nipq--;
1140 (void) m_free(dtom(fp));
1141 ip_nfragpackets--;
1144 m->m_len += (IP_VHL_HL(ip->ip_vhl) << 2);
1145 m->m_data -= (IP_VHL_HL(ip->ip_vhl) << 2);
1142 m->m_len += (ip->ip_hl << 2);
1143 m->m_data -= (ip->ip_hl << 2);
1146 /* some debugging cruft by sklower, below, will go away soon */
1147 if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */
1148 m_fixhdr(m);
1149 return (m);
1150
1151dropfrag:
1152#ifdef IPDIVERT
1153 *divinfo = 0;
1154 *divert_rule = 0;
1155#endif
1156 ipstat.ips_fragdropped++;
1157 m_freem(m);
1158 return (0);
1159
1160#undef GETIP
1161}
1162
1163/*
1164 * Free a fragment reassembly header and all
1165 * associated datagrams.
1166 */
1167static void
1168ip_freef(fhp, fp)
1169 struct ipqhead *fhp;
1170 struct ipq *fp;
1171{
1172 register struct mbuf *q;
1173
1174 while (fp->ipq_frags) {
1175 q = fp->ipq_frags;
1176 fp->ipq_frags = q->m_nextpkt;
1177 m_freem(q);
1178 }
1179 TAILQ_REMOVE(fhp, fp, ipq_list);
1180 (void) m_free(dtom(fp));
1181 ip_nfragpackets--;
1182 nipq--;
1183}
1184
1185/*
1186 * IP timer processing;
1187 * if a timer expires on a reassembly
1188 * queue, discard it.
1189 */
1190void
1191ip_slowtimo()
1192{
1193 register struct ipq *fp;
1194 int s = splnet();
1195 int i;
1196
1197 for (i = 0; i < IPREASS_NHASH; i++) {
1198 for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
1199 struct ipq *fpp;
1200
1201 fpp = fp;
1202 fp = TAILQ_NEXT(fp, ipq_list);
1203 if(--fpp->ipq_ttl == 0) {
1204 ipstat.ips_fragtimeout++;
1205 ip_freef(&ipq[i], fpp);
1206 }
1207 }
1208 }
1209 /*
1210 * If we are over the maximum number of fragments
1211 * (due to the limit being lowered), drain off
1212 * enough to get down to the new limit.
1213 */
1214 for (i = 0; i < IPREASS_NHASH; i++) {
1215 if (ip_maxfragpackets >= 0) {
1216 while (ip_nfragpackets > ip_maxfragpackets &&
1217 !TAILQ_EMPTY(&ipq[i])) {
1218 ipstat.ips_fragdropped++;
1219 ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1220 }
1221 }
1222 }
1223 ipflow_slowtimo();
1224 splx(s);
1225}
1226
1227/*
1228 * Drain off all datagram fragments.
1229 */
1230void
1231ip_drain()
1232{
1233 int i;
1234
1235 for (i = 0; i < IPREASS_NHASH; i++) {
1236 while(!TAILQ_EMPTY(&ipq[i])) {
1237 ipstat.ips_fragdropped++;
1238 ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1239 }
1240 }
1241 in_rtqdrain();
1242}
1243
1244/*
1245 * Do option processing on a datagram,
1246 * possibly discarding it if bad options are encountered,
1247 * or forwarding it if source-routed.
1248 * The pass argument is used when operating in the IPSTEALTH
1249 * mode to tell what options to process:
1250 * [LS]SRR (pass 0) or the others (pass 1).
1251 * The reason for as many as two passes is that when doing IPSTEALTH,
1252 * non-routing options should be processed only if the packet is for us.
1253 * Returns 1 if packet has been forwarded/freed,
1254 * 0 if the packet should be processed further.
1255 */
1256static int
1257ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop)
1258{
1259 struct ip *ip = mtod(m, struct ip *);
1260 u_char *cp;
1261 struct in_ifaddr *ia;
1262 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1263 struct in_addr *sin, dst;
1264 n_time ntime;
1265 struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
1266
1267 dst = ip->ip_dst;
1268 cp = (u_char *)(ip + 1);
1144 /* some debugging cruft by sklower, below, will go away soon */
1145 if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */
1146 m_fixhdr(m);
1147 return (m);
1148
1149dropfrag:
1150#ifdef IPDIVERT
1151 *divinfo = 0;
1152 *divert_rule = 0;
1153#endif
1154 ipstat.ips_fragdropped++;
1155 m_freem(m);
1156 return (0);
1157
1158#undef GETIP
1159}
1160
1161/*
1162 * Free a fragment reassembly header and all
1163 * associated datagrams.
1164 */
1165static void
1166ip_freef(fhp, fp)
1167 struct ipqhead *fhp;
1168 struct ipq *fp;
1169{
1170 register struct mbuf *q;
1171
1172 while (fp->ipq_frags) {
1173 q = fp->ipq_frags;
1174 fp->ipq_frags = q->m_nextpkt;
1175 m_freem(q);
1176 }
1177 TAILQ_REMOVE(fhp, fp, ipq_list);
1178 (void) m_free(dtom(fp));
1179 ip_nfragpackets--;
1180 nipq--;
1181}
1182
1183/*
1184 * IP timer processing;
1185 * if a timer expires on a reassembly
1186 * queue, discard it.
1187 */
1188void
1189ip_slowtimo()
1190{
1191 register struct ipq *fp;
1192 int s = splnet();
1193 int i;
1194
1195 for (i = 0; i < IPREASS_NHASH; i++) {
1196 for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
1197 struct ipq *fpp;
1198
1199 fpp = fp;
1200 fp = TAILQ_NEXT(fp, ipq_list);
1201 if(--fpp->ipq_ttl == 0) {
1202 ipstat.ips_fragtimeout++;
1203 ip_freef(&ipq[i], fpp);
1204 }
1205 }
1206 }
1207 /*
1208 * If we are over the maximum number of fragments
1209 * (due to the limit being lowered), drain off
1210 * enough to get down to the new limit.
1211 */
1212 for (i = 0; i < IPREASS_NHASH; i++) {
1213 if (ip_maxfragpackets >= 0) {
1214 while (ip_nfragpackets > ip_maxfragpackets &&
1215 !TAILQ_EMPTY(&ipq[i])) {
1216 ipstat.ips_fragdropped++;
1217 ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1218 }
1219 }
1220 }
1221 ipflow_slowtimo();
1222 splx(s);
1223}
1224
1225/*
1226 * Drain off all datagram fragments.
1227 */
1228void
1229ip_drain()
1230{
1231 int i;
1232
1233 for (i = 0; i < IPREASS_NHASH; i++) {
1234 while(!TAILQ_EMPTY(&ipq[i])) {
1235 ipstat.ips_fragdropped++;
1236 ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
1237 }
1238 }
1239 in_rtqdrain();
1240}
1241
1242/*
1243 * Do option processing on a datagram,
1244 * possibly discarding it if bad options are encountered,
1245 * or forwarding it if source-routed.
1246 * The pass argument is used when operating in the IPSTEALTH
1247 * mode to tell what options to process:
1248 * [LS]SRR (pass 0) or the others (pass 1).
1249 * The reason for as many as two passes is that when doing IPSTEALTH,
1250 * non-routing options should be processed only if the packet is for us.
1251 * Returns 1 if packet has been forwarded/freed,
1252 * 0 if the packet should be processed further.
1253 */
1254static int
1255ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop)
1256{
1257 struct ip *ip = mtod(m, struct ip *);
1258 u_char *cp;
1259 struct in_ifaddr *ia;
1260 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1261 struct in_addr *sin, dst;
1262 n_time ntime;
1263 struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
1264
1265 dst = ip->ip_dst;
1266 cp = (u_char *)(ip + 1);
1269 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1267 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1270 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1271 opt = cp[IPOPT_OPTVAL];
1272 if (opt == IPOPT_EOL)
1273 break;
1274 if (opt == IPOPT_NOP)
1275 optlen = 1;
1276 else {
1277 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1278 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1279 goto bad;
1280 }
1281 optlen = cp[IPOPT_OLEN];
1282 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1283 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1284 goto bad;
1285 }
1286 }
1287 switch (opt) {
1288
1289 default:
1290 break;
1291
1292 /*
1293 * Source routing with record.
1294 * Find interface with current destination address.
1295 * If none on this machine then drop if strictly routed,
1296 * or do nothing if loosely routed.
1297 * Record interface address and bring up next address
1298 * component. If strictly routed make sure next
1299 * address is on directly accessible net.
1300 */
1301 case IPOPT_LSRR:
1302 case IPOPT_SSRR:
1303#ifdef IPSTEALTH
1304 if (ipstealth && pass > 0)
1305 break;
1306#endif
1307 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1308 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1309 goto bad;
1310 }
1311 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1312 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1313 goto bad;
1314 }
1315 ipaddr.sin_addr = ip->ip_dst;
1316 ia = (struct in_ifaddr *)
1317 ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1318 if (ia == 0) {
1319 if (opt == IPOPT_SSRR) {
1320 type = ICMP_UNREACH;
1321 code = ICMP_UNREACH_SRCFAIL;
1322 goto bad;
1323 }
1324 if (!ip_dosourceroute)
1325 goto nosourcerouting;
1326 /*
1327 * Loose routing, and not at next destination
1328 * yet; nothing to do except forward.
1329 */
1330 break;
1331 }
1332 off--; /* 0 origin */
1333 if (off > optlen - (int)sizeof(struct in_addr)) {
1334 /*
1335 * End of source route. Should be for us.
1336 */
1337 if (!ip_acceptsourceroute)
1338 goto nosourcerouting;
1339 save_rte(cp, ip->ip_src);
1340 break;
1341 }
1342#ifdef IPSTEALTH
1343 if (ipstealth)
1344 goto dropit;
1345#endif
1346 if (!ip_dosourceroute) {
1347 if (ipforwarding) {
1348 char buf[16]; /* aaa.bbb.ccc.ddd\0 */
1349 /*
1350 * Acting as a router, so generate ICMP
1351 */
1352nosourcerouting:
1353 strcpy(buf, inet_ntoa(ip->ip_dst));
1354 log(LOG_WARNING,
1355 "attempted source route from %s to %s\n",
1356 inet_ntoa(ip->ip_src), buf);
1357 type = ICMP_UNREACH;
1358 code = ICMP_UNREACH_SRCFAIL;
1359 goto bad;
1360 } else {
1361 /*
1362 * Not acting as a router, so silently drop.
1363 */
1364#ifdef IPSTEALTH
1365dropit:
1366#endif
1367 ipstat.ips_cantforward++;
1368 m_freem(m);
1369 return (1);
1370 }
1371 }
1372
1373 /*
1374 * locate outgoing interface
1375 */
1376 (void)memcpy(&ipaddr.sin_addr, cp + off,
1377 sizeof(ipaddr.sin_addr));
1378
1379 if (opt == IPOPT_SSRR) {
1380#define INA struct in_ifaddr *
1381#define SA struct sockaddr *
1382 if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1383 ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1384 } else
1385 ia = ip_rtaddr(ipaddr.sin_addr, &ipforward_rt);
1386 if (ia == 0) {
1387 type = ICMP_UNREACH;
1388 code = ICMP_UNREACH_SRCFAIL;
1389 goto bad;
1390 }
1391 ip->ip_dst = ipaddr.sin_addr;
1392 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
1393 sizeof(struct in_addr));
1394 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1395 /*
1396 * Let ip_intr's mcast routing check handle mcast pkts
1397 */
1398 forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1399 break;
1400
1401 case IPOPT_RR:
1402#ifdef IPSTEALTH
1403 if (ipstealth && pass == 0)
1404 break;
1405#endif
1406 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1407 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1408 goto bad;
1409 }
1410 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1411 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1412 goto bad;
1413 }
1414 /*
1415 * If no space remains, ignore.
1416 */
1417 off--; /* 0 origin */
1418 if (off > optlen - (int)sizeof(struct in_addr))
1419 break;
1420 (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1421 sizeof(ipaddr.sin_addr));
1422 /*
1423 * locate outgoing interface; if we're the destination,
1424 * use the incoming interface (should be same).
1425 */
1426 if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
1427 (ia = ip_rtaddr(ipaddr.sin_addr,
1428 &ipforward_rt)) == 0) {
1429 type = ICMP_UNREACH;
1430 code = ICMP_UNREACH_HOST;
1431 goto bad;
1432 }
1433 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
1434 sizeof(struct in_addr));
1435 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1436 break;
1437
1438 case IPOPT_TS:
1439#ifdef IPSTEALTH
1440 if (ipstealth && pass == 0)
1441 break;
1442#endif
1443 code = cp - (u_char *)ip;
1444 if (optlen < 4 || optlen > 40) {
1445 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1446 goto bad;
1447 }
1448 if ((off = cp[IPOPT_OFFSET]) < 5) {
1449 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1450 goto bad;
1451 }
1452 if (off > optlen - (int)sizeof(int32_t)) {
1453 cp[IPOPT_OFFSET + 1] += (1 << 4);
1454 if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
1455 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1456 goto bad;
1457 }
1458 break;
1459 }
1460 off--; /* 0 origin */
1461 sin = (struct in_addr *)(cp + off);
1462 switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
1463
1464 case IPOPT_TS_TSONLY:
1465 break;
1466
1467 case IPOPT_TS_TSANDADDR:
1468 if (off + sizeof(n_time) +
1469 sizeof(struct in_addr) > optlen) {
1470 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1471 goto bad;
1472 }
1473 ipaddr.sin_addr = dst;
1474 ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1475 m->m_pkthdr.rcvif);
1476 if (ia == 0)
1477 continue;
1478 (void)memcpy(sin, &IA_SIN(ia)->sin_addr,
1479 sizeof(struct in_addr));
1480 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1481 off += sizeof(struct in_addr);
1482 break;
1483
1484 case IPOPT_TS_PRESPEC:
1485 if (off + sizeof(n_time) +
1486 sizeof(struct in_addr) > optlen) {
1487 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1488 goto bad;
1489 }
1490 (void)memcpy(&ipaddr.sin_addr, sin,
1491 sizeof(struct in_addr));
1492 if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1493 continue;
1494 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1495 off += sizeof(struct in_addr);
1496 break;
1497
1498 default:
1499 code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
1500 goto bad;
1501 }
1502 ntime = iptime();
1503 (void)memcpy(cp + off, &ntime, sizeof(n_time));
1504 cp[IPOPT_OFFSET] += sizeof(n_time);
1505 }
1506 }
1507 if (forward && ipforwarding) {
1508 ip_forward(m, 1, next_hop);
1509 return (1);
1510 }
1511 return (0);
1512bad:
1513 icmp_error(m, type, code, 0, 0);
1514 ipstat.ips_badoptions++;
1515 return (1);
1516}
1517
1518/*
1519 * Given address of next destination (final or next hop),
1520 * return internet address info of interface to be used to get there.
1521 */
1522struct in_ifaddr *
1523ip_rtaddr(dst, rt)
1524 struct in_addr dst;
1525 struct route *rt;
1526{
1527 register struct sockaddr_in *sin;
1528
1529 sin = (struct sockaddr_in *)&rt->ro_dst;
1530
1531 if (rt->ro_rt == 0 ||
1532 !(rt->ro_rt->rt_flags & RTF_UP) ||
1533 dst.s_addr != sin->sin_addr.s_addr) {
1534 if (rt->ro_rt) {
1535 RTFREE(rt->ro_rt);
1536 rt->ro_rt = 0;
1537 }
1538 sin->sin_family = AF_INET;
1539 sin->sin_len = sizeof(*sin);
1540 sin->sin_addr = dst;
1541
1542 rtalloc_ign(rt, RTF_PRCLONING);
1543 }
1544 if (rt->ro_rt == 0)
1545 return ((struct in_ifaddr *)0);
1546 return (ifatoia(rt->ro_rt->rt_ifa));
1547}
1548
1549/*
1550 * Save incoming source route for use in replies,
1551 * to be picked up later by ip_srcroute if the receiver is interested.
1552 */
1553static void
1554save_rte(option, dst)
1555 u_char *option;
1556 struct in_addr dst;
1557{
1558 unsigned olen;
1559
1560 olen = option[IPOPT_OLEN];
1561#ifdef DIAGNOSTIC
1562 if (ipprintfs)
1563 printf("save_rte: olen %d\n", olen);
1564#endif
1565 if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1566 return;
1567 bcopy(option, ip_srcrt.srcopt, olen);
1568 ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1569 ip_srcrt.dst = dst;
1570}
1571
1572/*
1573 * Retrieve incoming source route for use in replies,
1574 * in the same form used by setsockopt.
1575 * The first hop is placed before the options, will be removed later.
1576 */
1577struct mbuf *
1578ip_srcroute()
1579{
1580 register struct in_addr *p, *q;
1581 register struct mbuf *m;
1582
1583 if (ip_nhops == 0)
1584 return ((struct mbuf *)0);
1585 m = m_get(M_DONTWAIT, MT_HEADER);
1586 if (m == 0)
1587 return ((struct mbuf *)0);
1588
1589#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1590
1591 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1592 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1593 OPTSIZ;
1594#ifdef DIAGNOSTIC
1595 if (ipprintfs)
1596 printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1597#endif
1598
1599 /*
1600 * First save first hop for return route
1601 */
1602 p = &ip_srcrt.route[ip_nhops - 1];
1603 *(mtod(m, struct in_addr *)) = *p--;
1604#ifdef DIAGNOSTIC
1605 if (ipprintfs)
1606 printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1607#endif
1608
1609 /*
1610 * Copy option fields and padding (nop) to mbuf.
1611 */
1612 ip_srcrt.nop = IPOPT_NOP;
1613 ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1614 (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
1615 &ip_srcrt.nop, OPTSIZ);
1616 q = (struct in_addr *)(mtod(m, caddr_t) +
1617 sizeof(struct in_addr) + OPTSIZ);
1618#undef OPTSIZ
1619 /*
1620 * Record return path as an IP source route,
1621 * reversing the path (pointers are now aligned).
1622 */
1623 while (p >= ip_srcrt.route) {
1624#ifdef DIAGNOSTIC
1625 if (ipprintfs)
1626 printf(" %lx", (u_long)ntohl(q->s_addr));
1627#endif
1628 *q++ = *p--;
1629 }
1630 /*
1631 * Last hop goes to final destination.
1632 */
1633 *q = ip_srcrt.dst;
1634#ifdef DIAGNOSTIC
1635 if (ipprintfs)
1636 printf(" %lx\n", (u_long)ntohl(q->s_addr));
1637#endif
1638 return (m);
1639}
1640
1641/*
1642 * Strip out IP options, at higher
1643 * level protocol in the kernel.
1644 * Second argument is buffer to which options
1645 * will be moved, and return value is their length.
1646 * XXX should be deleted; last arg currently ignored.
1647 */
1648void
1649ip_stripoptions(m, mopt)
1650 register struct mbuf *m;
1651 struct mbuf *mopt;
1652{
1653 register int i;
1654 struct ip *ip = mtod(m, struct ip *);
1655 register caddr_t opts;
1656 int olen;
1657
1268 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1269 opt = cp[IPOPT_OPTVAL];
1270 if (opt == IPOPT_EOL)
1271 break;
1272 if (opt == IPOPT_NOP)
1273 optlen = 1;
1274 else {
1275 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1276 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1277 goto bad;
1278 }
1279 optlen = cp[IPOPT_OLEN];
1280 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1281 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1282 goto bad;
1283 }
1284 }
1285 switch (opt) {
1286
1287 default:
1288 break;
1289
1290 /*
1291 * Source routing with record.
1292 * Find interface with current destination address.
1293 * If none on this machine then drop if strictly routed,
1294 * or do nothing if loosely routed.
1295 * Record interface address and bring up next address
1296 * component. If strictly routed make sure next
1297 * address is on directly accessible net.
1298 */
1299 case IPOPT_LSRR:
1300 case IPOPT_SSRR:
1301#ifdef IPSTEALTH
1302 if (ipstealth && pass > 0)
1303 break;
1304#endif
1305 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1306 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1307 goto bad;
1308 }
1309 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1310 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1311 goto bad;
1312 }
1313 ipaddr.sin_addr = ip->ip_dst;
1314 ia = (struct in_ifaddr *)
1315 ifa_ifwithaddr((struct sockaddr *)&ipaddr);
1316 if (ia == 0) {
1317 if (opt == IPOPT_SSRR) {
1318 type = ICMP_UNREACH;
1319 code = ICMP_UNREACH_SRCFAIL;
1320 goto bad;
1321 }
1322 if (!ip_dosourceroute)
1323 goto nosourcerouting;
1324 /*
1325 * Loose routing, and not at next destination
1326 * yet; nothing to do except forward.
1327 */
1328 break;
1329 }
1330 off--; /* 0 origin */
1331 if (off > optlen - (int)sizeof(struct in_addr)) {
1332 /*
1333 * End of source route. Should be for us.
1334 */
1335 if (!ip_acceptsourceroute)
1336 goto nosourcerouting;
1337 save_rte(cp, ip->ip_src);
1338 break;
1339 }
1340#ifdef IPSTEALTH
1341 if (ipstealth)
1342 goto dropit;
1343#endif
1344 if (!ip_dosourceroute) {
1345 if (ipforwarding) {
1346 char buf[16]; /* aaa.bbb.ccc.ddd\0 */
1347 /*
1348 * Acting as a router, so generate ICMP
1349 */
1350nosourcerouting:
1351 strcpy(buf, inet_ntoa(ip->ip_dst));
1352 log(LOG_WARNING,
1353 "attempted source route from %s to %s\n",
1354 inet_ntoa(ip->ip_src), buf);
1355 type = ICMP_UNREACH;
1356 code = ICMP_UNREACH_SRCFAIL;
1357 goto bad;
1358 } else {
1359 /*
1360 * Not acting as a router, so silently drop.
1361 */
1362#ifdef IPSTEALTH
1363dropit:
1364#endif
1365 ipstat.ips_cantforward++;
1366 m_freem(m);
1367 return (1);
1368 }
1369 }
1370
1371 /*
1372 * locate outgoing interface
1373 */
1374 (void)memcpy(&ipaddr.sin_addr, cp + off,
1375 sizeof(ipaddr.sin_addr));
1376
1377 if (opt == IPOPT_SSRR) {
1378#define INA struct in_ifaddr *
1379#define SA struct sockaddr *
1380 if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
1381 ia = (INA)ifa_ifwithnet((SA)&ipaddr);
1382 } else
1383 ia = ip_rtaddr(ipaddr.sin_addr, &ipforward_rt);
1384 if (ia == 0) {
1385 type = ICMP_UNREACH;
1386 code = ICMP_UNREACH_SRCFAIL;
1387 goto bad;
1388 }
1389 ip->ip_dst = ipaddr.sin_addr;
1390 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
1391 sizeof(struct in_addr));
1392 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1393 /*
1394 * Let ip_intr's mcast routing check handle mcast pkts
1395 */
1396 forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
1397 break;
1398
1399 case IPOPT_RR:
1400#ifdef IPSTEALTH
1401 if (ipstealth && pass == 0)
1402 break;
1403#endif
1404 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1405 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1406 goto bad;
1407 }
1408 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1409 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1410 goto bad;
1411 }
1412 /*
1413 * If no space remains, ignore.
1414 */
1415 off--; /* 0 origin */
1416 if (off > optlen - (int)sizeof(struct in_addr))
1417 break;
1418 (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
1419 sizeof(ipaddr.sin_addr));
1420 /*
1421 * locate outgoing interface; if we're the destination,
1422 * use the incoming interface (should be same).
1423 */
1424 if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
1425 (ia = ip_rtaddr(ipaddr.sin_addr,
1426 &ipforward_rt)) == 0) {
1427 type = ICMP_UNREACH;
1428 code = ICMP_UNREACH_HOST;
1429 goto bad;
1430 }
1431 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
1432 sizeof(struct in_addr));
1433 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1434 break;
1435
1436 case IPOPT_TS:
1437#ifdef IPSTEALTH
1438 if (ipstealth && pass == 0)
1439 break;
1440#endif
1441 code = cp - (u_char *)ip;
1442 if (optlen < 4 || optlen > 40) {
1443 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1444 goto bad;
1445 }
1446 if ((off = cp[IPOPT_OFFSET]) < 5) {
1447 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1448 goto bad;
1449 }
1450 if (off > optlen - (int)sizeof(int32_t)) {
1451 cp[IPOPT_OFFSET + 1] += (1 << 4);
1452 if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
1453 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1454 goto bad;
1455 }
1456 break;
1457 }
1458 off--; /* 0 origin */
1459 sin = (struct in_addr *)(cp + off);
1460 switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
1461
1462 case IPOPT_TS_TSONLY:
1463 break;
1464
1465 case IPOPT_TS_TSANDADDR:
1466 if (off + sizeof(n_time) +
1467 sizeof(struct in_addr) > optlen) {
1468 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1469 goto bad;
1470 }
1471 ipaddr.sin_addr = dst;
1472 ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
1473 m->m_pkthdr.rcvif);
1474 if (ia == 0)
1475 continue;
1476 (void)memcpy(sin, &IA_SIN(ia)->sin_addr,
1477 sizeof(struct in_addr));
1478 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1479 off += sizeof(struct in_addr);
1480 break;
1481
1482 case IPOPT_TS_PRESPEC:
1483 if (off + sizeof(n_time) +
1484 sizeof(struct in_addr) > optlen) {
1485 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1486 goto bad;
1487 }
1488 (void)memcpy(&ipaddr.sin_addr, sin,
1489 sizeof(struct in_addr));
1490 if (ifa_ifwithaddr((SA)&ipaddr) == 0)
1491 continue;
1492 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1493 off += sizeof(struct in_addr);
1494 break;
1495
1496 default:
1497 code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
1498 goto bad;
1499 }
1500 ntime = iptime();
1501 (void)memcpy(cp + off, &ntime, sizeof(n_time));
1502 cp[IPOPT_OFFSET] += sizeof(n_time);
1503 }
1504 }
1505 if (forward && ipforwarding) {
1506 ip_forward(m, 1, next_hop);
1507 return (1);
1508 }
1509 return (0);
1510bad:
1511 icmp_error(m, type, code, 0, 0);
1512 ipstat.ips_badoptions++;
1513 return (1);
1514}
1515
1516/*
1517 * Given address of next destination (final or next hop),
1518 * return internet address info of interface to be used to get there.
1519 */
1520struct in_ifaddr *
1521ip_rtaddr(dst, rt)
1522 struct in_addr dst;
1523 struct route *rt;
1524{
1525 register struct sockaddr_in *sin;
1526
1527 sin = (struct sockaddr_in *)&rt->ro_dst;
1528
1529 if (rt->ro_rt == 0 ||
1530 !(rt->ro_rt->rt_flags & RTF_UP) ||
1531 dst.s_addr != sin->sin_addr.s_addr) {
1532 if (rt->ro_rt) {
1533 RTFREE(rt->ro_rt);
1534 rt->ro_rt = 0;
1535 }
1536 sin->sin_family = AF_INET;
1537 sin->sin_len = sizeof(*sin);
1538 sin->sin_addr = dst;
1539
1540 rtalloc_ign(rt, RTF_PRCLONING);
1541 }
1542 if (rt->ro_rt == 0)
1543 return ((struct in_ifaddr *)0);
1544 return (ifatoia(rt->ro_rt->rt_ifa));
1545}
1546
1547/*
1548 * Save incoming source route for use in replies,
1549 * to be picked up later by ip_srcroute if the receiver is interested.
1550 */
1551static void
1552save_rte(option, dst)
1553 u_char *option;
1554 struct in_addr dst;
1555{
1556 unsigned olen;
1557
1558 olen = option[IPOPT_OLEN];
1559#ifdef DIAGNOSTIC
1560 if (ipprintfs)
1561 printf("save_rte: olen %d\n", olen);
1562#endif
1563 if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1564 return;
1565 bcopy(option, ip_srcrt.srcopt, olen);
1566 ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1567 ip_srcrt.dst = dst;
1568}
1569
1570/*
1571 * Retrieve incoming source route for use in replies,
1572 * in the same form used by setsockopt.
1573 * The first hop is placed before the options, will be removed later.
1574 */
1575struct mbuf *
1576ip_srcroute()
1577{
1578 register struct in_addr *p, *q;
1579 register struct mbuf *m;
1580
1581 if (ip_nhops == 0)
1582 return ((struct mbuf *)0);
1583 m = m_get(M_DONTWAIT, MT_HEADER);
1584 if (m == 0)
1585 return ((struct mbuf *)0);
1586
1587#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1588
1589 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1590 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1591 OPTSIZ;
1592#ifdef DIAGNOSTIC
1593 if (ipprintfs)
1594 printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1595#endif
1596
1597 /*
1598 * First save first hop for return route
1599 */
1600 p = &ip_srcrt.route[ip_nhops - 1];
1601 *(mtod(m, struct in_addr *)) = *p--;
1602#ifdef DIAGNOSTIC
1603 if (ipprintfs)
1604 printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
1605#endif
1606
1607 /*
1608 * Copy option fields and padding (nop) to mbuf.
1609 */
1610 ip_srcrt.nop = IPOPT_NOP;
1611 ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1612 (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
1613 &ip_srcrt.nop, OPTSIZ);
1614 q = (struct in_addr *)(mtod(m, caddr_t) +
1615 sizeof(struct in_addr) + OPTSIZ);
1616#undef OPTSIZ
1617 /*
1618 * Record return path as an IP source route,
1619 * reversing the path (pointers are now aligned).
1620 */
1621 while (p >= ip_srcrt.route) {
1622#ifdef DIAGNOSTIC
1623 if (ipprintfs)
1624 printf(" %lx", (u_long)ntohl(q->s_addr));
1625#endif
1626 *q++ = *p--;
1627 }
1628 /*
1629 * Last hop goes to final destination.
1630 */
1631 *q = ip_srcrt.dst;
1632#ifdef DIAGNOSTIC
1633 if (ipprintfs)
1634 printf(" %lx\n", (u_long)ntohl(q->s_addr));
1635#endif
1636 return (m);
1637}
1638
1639/*
1640 * Strip out IP options, at higher
1641 * level protocol in the kernel.
1642 * Second argument is buffer to which options
1643 * will be moved, and return value is their length.
1644 * XXX should be deleted; last arg currently ignored.
1645 */
1646void
1647ip_stripoptions(m, mopt)
1648 register struct mbuf *m;
1649 struct mbuf *mopt;
1650{
1651 register int i;
1652 struct ip *ip = mtod(m, struct ip *);
1653 register caddr_t opts;
1654 int olen;
1655
1658 olen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1656 olen = (ip->ip_hl << 2) - sizeof (struct ip);
1659 opts = (caddr_t)(ip + 1);
1660 i = m->m_len - (sizeof (struct ip) + olen);
1661 bcopy(opts + olen, opts, (unsigned)i);
1662 m->m_len -= olen;
1663 if (m->m_flags & M_PKTHDR)
1664 m->m_pkthdr.len -= olen;
1657 opts = (caddr_t)(ip + 1);
1658 i = m->m_len - (sizeof (struct ip) + olen);
1659 bcopy(opts + olen, opts, (unsigned)i);
1660 m->m_len -= olen;
1661 if (m->m_flags & M_PKTHDR)
1662 m->m_pkthdr.len -= olen;
1665 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2);
1663 ip->ip_v = IPVERSION;
1664 ip->ip_hl = sizeof(struct ip) >> 2;
1666}
1667
1668u_char inetctlerrmap[PRC_NCMDS] = {
1669 0, 0, 0, 0,
1670 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1671 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1672 EMSGSIZE, EHOSTUNREACH, 0, 0,
1673 0, 0, 0, 0,
1674 ENOPROTOOPT, ECONNREFUSED
1675};
1676
1677/*
1678 * Forward a packet. If some error occurs return the sender
1679 * an icmp packet. Note we can't always generate a meaningful
1680 * icmp message because icmp doesn't have a large enough repertoire
1681 * of codes and types.
1682 *
1683 * If not forwarding, just drop the packet. This could be confusing
1684 * if ipforwarding was zero but some routing protocol was advancing
1685 * us as a gateway to somewhere. However, we must let the routing
1686 * protocol deal with that.
1687 *
1688 * The srcrt parameter indicates whether the packet is being forwarded
1689 * via a source route.
1690 */
1691static void
1692ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
1693{
1694 struct ip *ip = mtod(m, struct ip *);
1695 struct rtentry *rt;
1696 int error, type = 0, code = 0;
1697 struct mbuf *mcopy;
1698 n_long dest;
1699 struct in_addr pkt_dst;
1700 struct ifnet *destifp;
1701#if defined(IPSEC) || defined(FAST_IPSEC)
1702 struct ifnet dummyifp;
1703#endif
1704
1705 dest = 0;
1706 /*
1707 * Cache the destination address of the packet; this may be
1708 * changed by use of 'ipfw fwd'.
1709 */
1710 pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst;
1711
1712#ifdef DIAGNOSTIC
1713 if (ipprintfs)
1714 printf("forward: src %lx dst %lx ttl %x\n",
1715 (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr,
1716 ip->ip_ttl);
1717#endif
1718
1719
1720 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) {
1721 ipstat.ips_cantforward++;
1722 m_freem(m);
1723 return;
1724 }
1725#ifdef IPSTEALTH
1726 if (!ipstealth) {
1727#endif
1728 if (ip->ip_ttl <= IPTTLDEC) {
1729 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
1730 dest, 0);
1731 return;
1732 }
1733#ifdef IPSTEALTH
1734 }
1735#endif
1736
1737 if (ip_rtaddr(pkt_dst, &ipforward_rt) == 0) {
1738 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1739 return;
1740 } else
1741 rt = ipforward_rt.ro_rt;
1742
1743 /*
1744 * Save the IP header and at most 8 bytes of the payload,
1745 * in case we need to generate an ICMP message to the src.
1746 *
1747 * XXX this can be optimized a lot by saving the data in a local
1748 * buffer on the stack (72 bytes at most), and only allocating the
1749 * mbuf if really necessary. The vast majority of the packets
1750 * are forwarded without having to send an ICMP back (either
1751 * because unnecessary, or because rate limited), so we are
1752 * really we are wasting a lot of work here.
1753 *
1754 * We don't use m_copy() because it might return a reference
1755 * to a shared cluster. Both this function and ip_output()
1756 * assume exclusive access to the IP header in `m', so any
1757 * data in a cluster may change before we reach icmp_error().
1758 */
1759 MGET(mcopy, M_DONTWAIT, m->m_type);
1760 if (mcopy != NULL) {
1761 M_COPY_PKTHDR(mcopy, m);
1665}
1666
1667u_char inetctlerrmap[PRC_NCMDS] = {
1668 0, 0, 0, 0,
1669 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1670 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1671 EMSGSIZE, EHOSTUNREACH, 0, 0,
1672 0, 0, 0, 0,
1673 ENOPROTOOPT, ECONNREFUSED
1674};
1675
1676/*
1677 * Forward a packet. If some error occurs return the sender
1678 * an icmp packet. Note we can't always generate a meaningful
1679 * icmp message because icmp doesn't have a large enough repertoire
1680 * of codes and types.
1681 *
1682 * If not forwarding, just drop the packet. This could be confusing
1683 * if ipforwarding was zero but some routing protocol was advancing
1684 * us as a gateway to somewhere. However, we must let the routing
1685 * protocol deal with that.
1686 *
1687 * The srcrt parameter indicates whether the packet is being forwarded
1688 * via a source route.
1689 */
1690static void
1691ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
1692{
1693 struct ip *ip = mtod(m, struct ip *);
1694 struct rtentry *rt;
1695 int error, type = 0, code = 0;
1696 struct mbuf *mcopy;
1697 n_long dest;
1698 struct in_addr pkt_dst;
1699 struct ifnet *destifp;
1700#if defined(IPSEC) || defined(FAST_IPSEC)
1701 struct ifnet dummyifp;
1702#endif
1703
1704 dest = 0;
1705 /*
1706 * Cache the destination address of the packet; this may be
1707 * changed by use of 'ipfw fwd'.
1708 */
1709 pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst;
1710
1711#ifdef DIAGNOSTIC
1712 if (ipprintfs)
1713 printf("forward: src %lx dst %lx ttl %x\n",
1714 (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr,
1715 ip->ip_ttl);
1716#endif
1717
1718
1719 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) {
1720 ipstat.ips_cantforward++;
1721 m_freem(m);
1722 return;
1723 }
1724#ifdef IPSTEALTH
1725 if (!ipstealth) {
1726#endif
1727 if (ip->ip_ttl <= IPTTLDEC) {
1728 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
1729 dest, 0);
1730 return;
1731 }
1732#ifdef IPSTEALTH
1733 }
1734#endif
1735
1736 if (ip_rtaddr(pkt_dst, &ipforward_rt) == 0) {
1737 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1738 return;
1739 } else
1740 rt = ipforward_rt.ro_rt;
1741
1742 /*
1743 * Save the IP header and at most 8 bytes of the payload,
1744 * in case we need to generate an ICMP message to the src.
1745 *
1746 * XXX this can be optimized a lot by saving the data in a local
1747 * buffer on the stack (72 bytes at most), and only allocating the
1748 * mbuf if really necessary. The vast majority of the packets
1749 * are forwarded without having to send an ICMP back (either
1750 * because unnecessary, or because rate limited), so we are
1751 * really we are wasting a lot of work here.
1752 *
1753 * We don't use m_copy() because it might return a reference
1754 * to a shared cluster. Both this function and ip_output()
1755 * assume exclusive access to the IP header in `m', so any
1756 * data in a cluster may change before we reach icmp_error().
1757 */
1758 MGET(mcopy, M_DONTWAIT, m->m_type);
1759 if (mcopy != NULL) {
1760 M_COPY_PKTHDR(mcopy, m);
1762 mcopy->m_len = imin((IP_VHL_HL(ip->ip_vhl) << 2) + 8,
1761 mcopy->m_len = imin((ip->ip_hl << 2) + 8,
1763 (int)ip->ip_len);
1764 m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1765#ifdef MAC
1766 /*
1767 * XXXMAC: This will eventually become an explicit
1768 * labeling point.
1769 */
1770 mac_create_mbuf_from_mbuf(m, mcopy);
1771#endif
1772 }
1773
1774#ifdef IPSTEALTH
1775 if (!ipstealth) {
1776#endif
1777 ip->ip_ttl -= IPTTLDEC;
1778#ifdef IPSTEALTH
1779 }
1780#endif
1781
1782 /*
1783 * If forwarding packet using same interface that it came in on,
1784 * perhaps should send a redirect to sender to shortcut a hop.
1785 * Only send redirect if source is sending directly to us,
1786 * and if packet was not source routed (or has any options).
1787 * Also, don't send redirect if forwarding using a default route
1788 * or a route modified by a redirect.
1789 */
1790 if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1791 (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1792 satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1793 ipsendredirects && !srcrt && !next_hop) {
1794#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa))
1795 u_long src = ntohl(ip->ip_src.s_addr);
1796
1797 if (RTA(rt) &&
1798 (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1799 if (rt->rt_flags & RTF_GATEWAY)
1800 dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1801 else
1802 dest = pkt_dst.s_addr;
1803 /* Router requirements says to only send host redirects */
1804 type = ICMP_REDIRECT;
1805 code = ICMP_REDIRECT_HOST;
1806#ifdef DIAGNOSTIC
1807 if (ipprintfs)
1808 printf("redirect (%d) to %lx\n", code, (u_long)dest);
1809#endif
1810 }
1811 }
1812
1813 {
1814 struct m_hdr tag;
1815
1816 if (next_hop) {
1817 /* Pass IPFORWARD info if available */
1818
1819 tag.mh_type = MT_TAG;
1820 tag.mh_flags = PACKET_TAG_IPFORWARD;
1821 tag.mh_data = (caddr_t)next_hop;
1822 tag.mh_next = m;
1823 m = (struct mbuf *)&tag;
1824 }
1825 error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
1826 IP_FORWARDING, 0, NULL);
1827 }
1828 if (error)
1829 ipstat.ips_cantforward++;
1830 else {
1831 ipstat.ips_forward++;
1832 if (type)
1833 ipstat.ips_redirectsent++;
1834 else {
1835 if (mcopy) {
1836 ipflow_create(&ipforward_rt, mcopy);
1837 m_freem(mcopy);
1838 }
1839 return;
1840 }
1841 }
1842 if (mcopy == NULL)
1843 return;
1844 destifp = NULL;
1845
1846 switch (error) {
1847
1848 case 0: /* forwarded, but need redirect */
1849 /* type, code set above */
1850 break;
1851
1852 case ENETUNREACH: /* shouldn't happen, checked above */
1853 case EHOSTUNREACH:
1854 case ENETDOWN:
1855 case EHOSTDOWN:
1856 default:
1857 type = ICMP_UNREACH;
1858 code = ICMP_UNREACH_HOST;
1859 break;
1860
1861 case EMSGSIZE:
1862 type = ICMP_UNREACH;
1863 code = ICMP_UNREACH_NEEDFRAG;
1864#ifdef IPSEC
1865 /*
1866 * If the packet is routed over IPsec tunnel, tell the
1867 * originator the tunnel MTU.
1868 * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1869 * XXX quickhack!!!
1870 */
1871 if (ipforward_rt.ro_rt) {
1872 struct secpolicy *sp = NULL;
1873 int ipsecerror;
1874 int ipsechdr;
1875 struct route *ro;
1876
1877 sp = ipsec4_getpolicybyaddr(mcopy,
1878 IPSEC_DIR_OUTBOUND,
1879 IP_FORWARDING,
1880 &ipsecerror);
1881
1882 if (sp == NULL)
1883 destifp = ipforward_rt.ro_rt->rt_ifp;
1884 else {
1885 /* count IPsec header size */
1886 ipsechdr = ipsec4_hdrsiz(mcopy,
1887 IPSEC_DIR_OUTBOUND,
1888 NULL);
1889
1890 /*
1891 * find the correct route for outer IPv4
1892 * header, compute tunnel MTU.
1893 *
1894 * XXX BUG ALERT
1895 * The "dummyifp" code relies upon the fact
1896 * that icmp_error() touches only ifp->if_mtu.
1897 */
1898 /*XXX*/
1899 destifp = NULL;
1900 if (sp->req != NULL
1901 && sp->req->sav != NULL
1902 && sp->req->sav->sah != NULL) {
1903 ro = &sp->req->sav->sah->sa_route;
1904 if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1905 dummyifp.if_mtu =
1906 ro->ro_rt->rt_ifp->if_mtu;
1907 dummyifp.if_mtu -= ipsechdr;
1908 destifp = &dummyifp;
1909 }
1910 }
1911
1912 key_freesp(sp);
1913 }
1914 }
1915#elif FAST_IPSEC
1916 /*
1917 * If the packet is routed over IPsec tunnel, tell the
1918 * originator the tunnel MTU.
1919 * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1920 * XXX quickhack!!!
1921 */
1922 if (ipforward_rt.ro_rt) {
1923 struct secpolicy *sp = NULL;
1924 int ipsecerror;
1925 int ipsechdr;
1926 struct route *ro;
1927
1928 sp = ipsec_getpolicybyaddr(mcopy,
1929 IPSEC_DIR_OUTBOUND,
1930 IP_FORWARDING,
1931 &ipsecerror);
1932
1933 if (sp == NULL)
1934 destifp = ipforward_rt.ro_rt->rt_ifp;
1935 else {
1936 /* count IPsec header size */
1937 ipsechdr = ipsec4_hdrsiz(mcopy,
1938 IPSEC_DIR_OUTBOUND,
1939 NULL);
1940
1941 /*
1942 * find the correct route for outer IPv4
1943 * header, compute tunnel MTU.
1944 *
1945 * XXX BUG ALERT
1946 * The "dummyifp" code relies upon the fact
1947 * that icmp_error() touches only ifp->if_mtu.
1948 */
1949 /*XXX*/
1950 destifp = NULL;
1951 if (sp->req != NULL
1952 && sp->req->sav != NULL
1953 && sp->req->sav->sah != NULL) {
1954 ro = &sp->req->sav->sah->sa_route;
1955 if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1956 dummyifp.if_mtu =
1957 ro->ro_rt->rt_ifp->if_mtu;
1958 dummyifp.if_mtu -= ipsechdr;
1959 destifp = &dummyifp;
1960 }
1961 }
1962
1963 KEY_FREESP(&sp);
1964 }
1965 }
1966#else /* !IPSEC && !FAST_IPSEC */
1967 if (ipforward_rt.ro_rt)
1968 destifp = ipforward_rt.ro_rt->rt_ifp;
1969#endif /*IPSEC*/
1970 ipstat.ips_cantfrag++;
1971 break;
1972
1973 case ENOBUFS:
1974 type = ICMP_SOURCEQUENCH;
1975 code = 0;
1976 break;
1977
1978 case EACCES: /* ipfw denied packet */
1979 m_freem(mcopy);
1980 return;
1981 }
1982 icmp_error(mcopy, type, code, dest, destifp);
1983}
1984
1985void
1986ip_savecontrol(inp, mp, ip, m)
1987 register struct inpcb *inp;
1988 register struct mbuf **mp;
1989 register struct ip *ip;
1990 register struct mbuf *m;
1991{
1992 if (inp->inp_socket->so_options & SO_TIMESTAMP) {
1993 struct timeval tv;
1994
1995 microtime(&tv);
1996 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1997 SCM_TIMESTAMP, SOL_SOCKET);
1998 if (*mp)
1999 mp = &(*mp)->m_next;
2000 }
2001 if (inp->inp_flags & INP_RECVDSTADDR) {
2002 *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
2003 sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2004 if (*mp)
2005 mp = &(*mp)->m_next;
2006 }
2007#ifdef notyet
2008 /* XXX
2009 * Moving these out of udp_input() made them even more broken
2010 * than they already were.
2011 */
2012 /* options were tossed already */
2013 if (inp->inp_flags & INP_RECVOPTS) {
2014 *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
2015 sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
2016 if (*mp)
2017 mp = &(*mp)->m_next;
2018 }
2019 /* ip_srcroute doesn't do what we want here, need to fix */
2020 if (inp->inp_flags & INP_RECVRETOPTS) {
2021 *mp = sbcreatecontrol((caddr_t) ip_srcroute(),
2022 sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
2023 if (*mp)
2024 mp = &(*mp)->m_next;
2025 }
2026#endif
2027 if (inp->inp_flags & INP_RECVIF) {
2028 struct ifnet *ifp;
2029 struct sdlbuf {
2030 struct sockaddr_dl sdl;
2031 u_char pad[32];
2032 } sdlbuf;
2033 struct sockaddr_dl *sdp;
2034 struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
2035
2036 if (((ifp = m->m_pkthdr.rcvif))
2037 && ( ifp->if_index && (ifp->if_index <= if_index))) {
2038 sdp = (struct sockaddr_dl *)
2039 (ifaddr_byindex(ifp->if_index)->ifa_addr);
2040 /*
2041 * Change our mind and don't try copy.
2042 */
2043 if ((sdp->sdl_family != AF_LINK)
2044 || (sdp->sdl_len > sizeof(sdlbuf))) {
2045 goto makedummy;
2046 }
2047 bcopy(sdp, sdl2, sdp->sdl_len);
2048 } else {
2049makedummy:
2050 sdl2->sdl_len
2051 = offsetof(struct sockaddr_dl, sdl_data[0]);
2052 sdl2->sdl_family = AF_LINK;
2053 sdl2->sdl_index = 0;
2054 sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
2055 }
2056 *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
2057 IP_RECVIF, IPPROTO_IP);
2058 if (*mp)
2059 mp = &(*mp)->m_next;
2060 }
2061}
2062
2063/*
2064 * XXX these routines are called from the upper part of the kernel.
2065 * They need to be locked when we remove Giant.
2066 *
2067 * They could also be moved to ip_mroute.c, since all the RSVP
2068 * handling is done there already.
2069 */
2070static int ip_rsvp_on;
2071struct socket *ip_rsvpd;
2072int
2073ip_rsvp_init(struct socket *so)
2074{
2075 if (so->so_type != SOCK_RAW ||
2076 so->so_proto->pr_protocol != IPPROTO_RSVP)
2077 return EOPNOTSUPP;
2078
2079 if (ip_rsvpd != NULL)
2080 return EADDRINUSE;
2081
2082 ip_rsvpd = so;
2083 /*
2084 * This may seem silly, but we need to be sure we don't over-increment
2085 * the RSVP counter, in case something slips up.
2086 */
2087 if (!ip_rsvp_on) {
2088 ip_rsvp_on = 1;
2089 rsvp_on++;
2090 }
2091
2092 return 0;
2093}
2094
2095int
2096ip_rsvp_done(void)
2097{
2098 ip_rsvpd = NULL;
2099 /*
2100 * This may seem silly, but we need to be sure we don't over-decrement
2101 * the RSVP counter, in case something slips up.
2102 */
2103 if (ip_rsvp_on) {
2104 ip_rsvp_on = 0;
2105 rsvp_on--;
2106 }
2107 return 0;
2108}
1762 (int)ip->ip_len);
1763 m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1764#ifdef MAC
1765 /*
1766 * XXXMAC: This will eventually become an explicit
1767 * labeling point.
1768 */
1769 mac_create_mbuf_from_mbuf(m, mcopy);
1770#endif
1771 }
1772
1773#ifdef IPSTEALTH
1774 if (!ipstealth) {
1775#endif
1776 ip->ip_ttl -= IPTTLDEC;
1777#ifdef IPSTEALTH
1778 }
1779#endif
1780
1781 /*
1782 * If forwarding packet using same interface that it came in on,
1783 * perhaps should send a redirect to sender to shortcut a hop.
1784 * Only send redirect if source is sending directly to us,
1785 * and if packet was not source routed (or has any options).
1786 * Also, don't send redirect if forwarding using a default route
1787 * or a route modified by a redirect.
1788 */
1789 if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1790 (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1791 satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1792 ipsendredirects && !srcrt && !next_hop) {
1793#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa))
1794 u_long src = ntohl(ip->ip_src.s_addr);
1795
1796 if (RTA(rt) &&
1797 (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1798 if (rt->rt_flags & RTF_GATEWAY)
1799 dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1800 else
1801 dest = pkt_dst.s_addr;
1802 /* Router requirements says to only send host redirects */
1803 type = ICMP_REDIRECT;
1804 code = ICMP_REDIRECT_HOST;
1805#ifdef DIAGNOSTIC
1806 if (ipprintfs)
1807 printf("redirect (%d) to %lx\n", code, (u_long)dest);
1808#endif
1809 }
1810 }
1811
1812 {
1813 struct m_hdr tag;
1814
1815 if (next_hop) {
1816 /* Pass IPFORWARD info if available */
1817
1818 tag.mh_type = MT_TAG;
1819 tag.mh_flags = PACKET_TAG_IPFORWARD;
1820 tag.mh_data = (caddr_t)next_hop;
1821 tag.mh_next = m;
1822 m = (struct mbuf *)&tag;
1823 }
1824 error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
1825 IP_FORWARDING, 0, NULL);
1826 }
1827 if (error)
1828 ipstat.ips_cantforward++;
1829 else {
1830 ipstat.ips_forward++;
1831 if (type)
1832 ipstat.ips_redirectsent++;
1833 else {
1834 if (mcopy) {
1835 ipflow_create(&ipforward_rt, mcopy);
1836 m_freem(mcopy);
1837 }
1838 return;
1839 }
1840 }
1841 if (mcopy == NULL)
1842 return;
1843 destifp = NULL;
1844
1845 switch (error) {
1846
1847 case 0: /* forwarded, but need redirect */
1848 /* type, code set above */
1849 break;
1850
1851 case ENETUNREACH: /* shouldn't happen, checked above */
1852 case EHOSTUNREACH:
1853 case ENETDOWN:
1854 case EHOSTDOWN:
1855 default:
1856 type = ICMP_UNREACH;
1857 code = ICMP_UNREACH_HOST;
1858 break;
1859
1860 case EMSGSIZE:
1861 type = ICMP_UNREACH;
1862 code = ICMP_UNREACH_NEEDFRAG;
1863#ifdef IPSEC
1864 /*
1865 * If the packet is routed over IPsec tunnel, tell the
1866 * originator the tunnel MTU.
1867 * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1868 * XXX quickhack!!!
1869 */
1870 if (ipforward_rt.ro_rt) {
1871 struct secpolicy *sp = NULL;
1872 int ipsecerror;
1873 int ipsechdr;
1874 struct route *ro;
1875
1876 sp = ipsec4_getpolicybyaddr(mcopy,
1877 IPSEC_DIR_OUTBOUND,
1878 IP_FORWARDING,
1879 &ipsecerror);
1880
1881 if (sp == NULL)
1882 destifp = ipforward_rt.ro_rt->rt_ifp;
1883 else {
1884 /* count IPsec header size */
1885 ipsechdr = ipsec4_hdrsiz(mcopy,
1886 IPSEC_DIR_OUTBOUND,
1887 NULL);
1888
1889 /*
1890 * find the correct route for outer IPv4
1891 * header, compute tunnel MTU.
1892 *
1893 * XXX BUG ALERT
1894 * The "dummyifp" code relies upon the fact
1895 * that icmp_error() touches only ifp->if_mtu.
1896 */
1897 /*XXX*/
1898 destifp = NULL;
1899 if (sp->req != NULL
1900 && sp->req->sav != NULL
1901 && sp->req->sav->sah != NULL) {
1902 ro = &sp->req->sav->sah->sa_route;
1903 if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1904 dummyifp.if_mtu =
1905 ro->ro_rt->rt_ifp->if_mtu;
1906 dummyifp.if_mtu -= ipsechdr;
1907 destifp = &dummyifp;
1908 }
1909 }
1910
1911 key_freesp(sp);
1912 }
1913 }
1914#elif FAST_IPSEC
1915 /*
1916 * If the packet is routed over IPsec tunnel, tell the
1917 * originator the tunnel MTU.
1918 * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1919 * XXX quickhack!!!
1920 */
1921 if (ipforward_rt.ro_rt) {
1922 struct secpolicy *sp = NULL;
1923 int ipsecerror;
1924 int ipsechdr;
1925 struct route *ro;
1926
1927 sp = ipsec_getpolicybyaddr(mcopy,
1928 IPSEC_DIR_OUTBOUND,
1929 IP_FORWARDING,
1930 &ipsecerror);
1931
1932 if (sp == NULL)
1933 destifp = ipforward_rt.ro_rt->rt_ifp;
1934 else {
1935 /* count IPsec header size */
1936 ipsechdr = ipsec4_hdrsiz(mcopy,
1937 IPSEC_DIR_OUTBOUND,
1938 NULL);
1939
1940 /*
1941 * find the correct route for outer IPv4
1942 * header, compute tunnel MTU.
1943 *
1944 * XXX BUG ALERT
1945 * The "dummyifp" code relies upon the fact
1946 * that icmp_error() touches only ifp->if_mtu.
1947 */
1948 /*XXX*/
1949 destifp = NULL;
1950 if (sp->req != NULL
1951 && sp->req->sav != NULL
1952 && sp->req->sav->sah != NULL) {
1953 ro = &sp->req->sav->sah->sa_route;
1954 if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1955 dummyifp.if_mtu =
1956 ro->ro_rt->rt_ifp->if_mtu;
1957 dummyifp.if_mtu -= ipsechdr;
1958 destifp = &dummyifp;
1959 }
1960 }
1961
1962 KEY_FREESP(&sp);
1963 }
1964 }
1965#else /* !IPSEC && !FAST_IPSEC */
1966 if (ipforward_rt.ro_rt)
1967 destifp = ipforward_rt.ro_rt->rt_ifp;
1968#endif /*IPSEC*/
1969 ipstat.ips_cantfrag++;
1970 break;
1971
1972 case ENOBUFS:
1973 type = ICMP_SOURCEQUENCH;
1974 code = 0;
1975 break;
1976
1977 case EACCES: /* ipfw denied packet */
1978 m_freem(mcopy);
1979 return;
1980 }
1981 icmp_error(mcopy, type, code, dest, destifp);
1982}
1983
1984void
1985ip_savecontrol(inp, mp, ip, m)
1986 register struct inpcb *inp;
1987 register struct mbuf **mp;
1988 register struct ip *ip;
1989 register struct mbuf *m;
1990{
1991 if (inp->inp_socket->so_options & SO_TIMESTAMP) {
1992 struct timeval tv;
1993
1994 microtime(&tv);
1995 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1996 SCM_TIMESTAMP, SOL_SOCKET);
1997 if (*mp)
1998 mp = &(*mp)->m_next;
1999 }
2000 if (inp->inp_flags & INP_RECVDSTADDR) {
2001 *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
2002 sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2003 if (*mp)
2004 mp = &(*mp)->m_next;
2005 }
2006#ifdef notyet
2007 /* XXX
2008 * Moving these out of udp_input() made them even more broken
2009 * than they already were.
2010 */
2011 /* options were tossed already */
2012 if (inp->inp_flags & INP_RECVOPTS) {
2013 *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
2014 sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
2015 if (*mp)
2016 mp = &(*mp)->m_next;
2017 }
2018 /* ip_srcroute doesn't do what we want here, need to fix */
2019 if (inp->inp_flags & INP_RECVRETOPTS) {
2020 *mp = sbcreatecontrol((caddr_t) ip_srcroute(),
2021 sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
2022 if (*mp)
2023 mp = &(*mp)->m_next;
2024 }
2025#endif
2026 if (inp->inp_flags & INP_RECVIF) {
2027 struct ifnet *ifp;
2028 struct sdlbuf {
2029 struct sockaddr_dl sdl;
2030 u_char pad[32];
2031 } sdlbuf;
2032 struct sockaddr_dl *sdp;
2033 struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
2034
2035 if (((ifp = m->m_pkthdr.rcvif))
2036 && ( ifp->if_index && (ifp->if_index <= if_index))) {
2037 sdp = (struct sockaddr_dl *)
2038 (ifaddr_byindex(ifp->if_index)->ifa_addr);
2039 /*
2040 * Change our mind and don't try copy.
2041 */
2042 if ((sdp->sdl_family != AF_LINK)
2043 || (sdp->sdl_len > sizeof(sdlbuf))) {
2044 goto makedummy;
2045 }
2046 bcopy(sdp, sdl2, sdp->sdl_len);
2047 } else {
2048makedummy:
2049 sdl2->sdl_len
2050 = offsetof(struct sockaddr_dl, sdl_data[0]);
2051 sdl2->sdl_family = AF_LINK;
2052 sdl2->sdl_index = 0;
2053 sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
2054 }
2055 *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
2056 IP_RECVIF, IPPROTO_IP);
2057 if (*mp)
2058 mp = &(*mp)->m_next;
2059 }
2060}
2061
2062/*
2063 * XXX these routines are called from the upper part of the kernel.
2064 * They need to be locked when we remove Giant.
2065 *
2066 * They could also be moved to ip_mroute.c, since all the RSVP
2067 * handling is done there already.
2068 */
2069static int ip_rsvp_on;
2070struct socket *ip_rsvpd;
2071int
2072ip_rsvp_init(struct socket *so)
2073{
2074 if (so->so_type != SOCK_RAW ||
2075 so->so_proto->pr_protocol != IPPROTO_RSVP)
2076 return EOPNOTSUPP;
2077
2078 if (ip_rsvpd != NULL)
2079 return EADDRINUSE;
2080
2081 ip_rsvpd = so;
2082 /*
2083 * This may seem silly, but we need to be sure we don't over-increment
2084 * the RSVP counter, in case something slips up.
2085 */
2086 if (!ip_rsvp_on) {
2087 ip_rsvp_on = 1;
2088 rsvp_on++;
2089 }
2090
2091 return 0;
2092}
2093
2094int
2095ip_rsvp_done(void)
2096{
2097 ip_rsvpd = NULL;
2098 /*
2099 * This may seem silly, but we need to be sure we don't over-decrement
2100 * the RSVP counter, in case something slips up.
2101 */
2102 if (ip_rsvp_on) {
2103 ip_rsvp_on = 0;
2104 rsvp_on--;
2105 }
2106 return 0;
2107}