Deleted Added
full compact
ip6_input.c (185571) ip6_input.c (185895)
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $
30 */
31
32/*-
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
61 */
62
63#include <sys/cdefs.h>
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $
30 */
31
32/*-
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
61 */
62
63#include <sys/cdefs.h>
64__FBSDID("$FreeBSD: head/sys/netinet6/ip6_input.c 185571 2008-12-02 21:37:28Z bz $");
64__FBSDID("$FreeBSD: head/sys/netinet6/ip6_input.c 185895 2008-12-10 23:12:39Z zec $");
65
66#include "opt_inet.h"
67#include "opt_inet6.h"
68#include "opt_ipsec.h"
69
70#include <sys/param.h>
71#include <sys/systm.h>
72#include <sys/malloc.h>
73#include <sys/mbuf.h>
74#include <sys/proc.h>
75#include <sys/domain.h>
76#include <sys/protosw.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/errno.h>
80#include <sys/time.h>
81#include <sys/kernel.h>
82#include <sys/syslog.h>
83#include <sys/vimage.h>
84
85#include <net/if.h>
86#include <net/if_types.h>
87#include <net/if_dl.h>
88#include <net/route.h>
89#include <net/netisr.h>
90#include <net/pfil.h>
91#include <net/vnet.h>
92
93#include <netinet/in.h>
94#include <netinet/in_systm.h>
95#ifdef INET
96#include <netinet/ip.h>
97#include <netinet/ip_icmp.h>
98#include <netinet/vinet.h>
99#endif /* INET */
100#include <netinet/ip6.h>
101#include <netinet6/in6_var.h>
102#include <netinet6/ip6_var.h>
103#include <netinet/in_pcb.h>
104#include <netinet/icmp6.h>
105#include <netinet6/scope6_var.h>
106#include <netinet6/in6_ifattach.h>
107#include <netinet6/nd6.h>
108#include <netinet6/vinet6.h>
109
110#ifdef IPSEC
111#include <netipsec/ipsec.h>
112#include <netinet6/ip6_ipsec.h>
113#include <netipsec/ipsec6.h>
114#endif /* IPSEC */
115
116#include <netinet6/ip6protosw.h>
117
118extern struct domain inet6domain;
119
120u_char ip6_protox[IPPROTO_MAX];
121static struct ifqueue ip6intrq;
122
65
66#include "opt_inet.h"
67#include "opt_inet6.h"
68#include "opt_ipsec.h"
69
70#include <sys/param.h>
71#include <sys/systm.h>
72#include <sys/malloc.h>
73#include <sys/mbuf.h>
74#include <sys/proc.h>
75#include <sys/domain.h>
76#include <sys/protosw.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/errno.h>
80#include <sys/time.h>
81#include <sys/kernel.h>
82#include <sys/syslog.h>
83#include <sys/vimage.h>
84
85#include <net/if.h>
86#include <net/if_types.h>
87#include <net/if_dl.h>
88#include <net/route.h>
89#include <net/netisr.h>
90#include <net/pfil.h>
91#include <net/vnet.h>
92
93#include <netinet/in.h>
94#include <netinet/in_systm.h>
95#ifdef INET
96#include <netinet/ip.h>
97#include <netinet/ip_icmp.h>
98#include <netinet/vinet.h>
99#endif /* INET */
100#include <netinet/ip6.h>
101#include <netinet6/in6_var.h>
102#include <netinet6/ip6_var.h>
103#include <netinet/in_pcb.h>
104#include <netinet/icmp6.h>
105#include <netinet6/scope6_var.h>
106#include <netinet6/in6_ifattach.h>
107#include <netinet6/nd6.h>
108#include <netinet6/vinet6.h>
109
110#ifdef IPSEC
111#include <netipsec/ipsec.h>
112#include <netinet6/ip6_ipsec.h>
113#include <netipsec/ipsec6.h>
114#endif /* IPSEC */
115
116#include <netinet6/ip6protosw.h>
117
118extern struct domain inet6domain;
119
120u_char ip6_protox[IPPROTO_MAX];
121static struct ifqueue ip6intrq;
122
123#ifndef VIMAGE
124#ifndef VIMAGE_GLOBALS
125struct vnet_inet6 vnet_inet6_0;
126#endif
127#endif
128
123#ifdef VIMAGE_GLOBALS
124static int ip6qmaxlen;
125struct in6_ifaddr *in6_ifaddr;
126struct ip6stat ip6stat;
127#endif
128
129extern struct callout in6_tmpaddrtimer_ch;
130
131extern int dad_init;
132extern int pmtu_expire;
133extern int pmtu_probe;
134extern u_long rip6_sendspace;
135extern u_long rip6_recvspace;
136extern int icmp6errppslim;
137extern int icmp6_nodeinfo;
138extern int udp6_sendspace;
139extern int udp6_recvspace;
140
141#ifdef VIMAGE_GLOBALS
142int ip6_forward_srcrt; /* XXX */
143int ip6_sourcecheck; /* XXX */
144int ip6_sourcecheck_interval; /* XXX */
145int ip6_ours_check_algorithm;
146#endif
147
148struct pfil_head inet6_pfil_hook;
149
150static void ip6_init2(void *);
151static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *);
152static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
153#ifdef PULLDOWN_TEST
154static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
155#endif
156
157/*
158 * IP6 initialization: fill in IP6 protocol switch table.
159 * All protocols not implemented in kernel go to raw IP6 protocol handler.
160 */
161void
162ip6_init(void)
163{
164 INIT_VNET_INET6(curvnet);
165 struct ip6protosw *pr;
166 int i;
167
168 V_ip6qmaxlen = IFQ_MAXLEN;
169 V_in6_maxmtu = 0;
170#ifdef IP6_AUTO_LINKLOCAL
171 V_ip6_auto_linklocal = IP6_AUTO_LINKLOCAL;
172#else
173 V_ip6_auto_linklocal = 1; /* enable by default */
174#endif
129#ifdef VIMAGE_GLOBALS
130static int ip6qmaxlen;
131struct in6_ifaddr *in6_ifaddr;
132struct ip6stat ip6stat;
133#endif
134
135extern struct callout in6_tmpaddrtimer_ch;
136
137extern int dad_init;
138extern int pmtu_expire;
139extern int pmtu_probe;
140extern u_long rip6_sendspace;
141extern u_long rip6_recvspace;
142extern int icmp6errppslim;
143extern int icmp6_nodeinfo;
144extern int udp6_sendspace;
145extern int udp6_recvspace;
146
147#ifdef VIMAGE_GLOBALS
148int ip6_forward_srcrt; /* XXX */
149int ip6_sourcecheck; /* XXX */
150int ip6_sourcecheck_interval; /* XXX */
151int ip6_ours_check_algorithm;
152#endif
153
154struct pfil_head inet6_pfil_hook;
155
156static void ip6_init2(void *);
157static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *);
158static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
159#ifdef PULLDOWN_TEST
160static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
161#endif
162
163/*
164 * IP6 initialization: fill in IP6 protocol switch table.
165 * All protocols not implemented in kernel go to raw IP6 protocol handler.
166 */
167void
168ip6_init(void)
169{
170 INIT_VNET_INET6(curvnet);
171 struct ip6protosw *pr;
172 int i;
173
174 V_ip6qmaxlen = IFQ_MAXLEN;
175 V_in6_maxmtu = 0;
176#ifdef IP6_AUTO_LINKLOCAL
177 V_ip6_auto_linklocal = IP6_AUTO_LINKLOCAL;
178#else
179 V_ip6_auto_linklocal = 1; /* enable by default */
180#endif
181 TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal",
182 &V_ip6_auto_linklocal);
175
176#ifndef IPV6FORWARDING
177#ifdef GATEWAY6
178#define IPV6FORWARDING 1 /* forward IP6 packets not for us */
179#else
180#define IPV6FORWARDING 0 /* don't forward IP6 packets not for us */
181#endif /* GATEWAY6 */
182#endif /* !IPV6FORWARDING */
183
184#ifndef IPV6_SENDREDIRECTS
185#define IPV6_SENDREDIRECTS 1
186#endif
187
188 V_ip6_forwarding = IPV6FORWARDING; /* act as router? */
189 V_ip6_sendredirects = IPV6_SENDREDIRECTS;
190 V_ip6_defhlim = IPV6_DEFHLIM;
191 V_ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS;
192 V_ip6_accept_rtadv = 0; /* "IPV6FORWARDING ? 0 : 1" is dangerous */
193 V_ip6_log_interval = 5;
194 V_ip6_hdrnestlimit = 15; /* How many header options will we process? */
195 V_ip6_dad_count = 1; /* DupAddrDetectionTransmits */
196 V_ip6_auto_flowlabel = 1;
197 V_ip6_use_deprecated = 1;/* allow deprecated addr (RFC2462 5.5.4) */
198 V_ip6_rr_prune = 5; /* router renumbering prefix
199 * walk list every 5 sec. */
200 V_ip6_mcast_pmtu = 0; /* enable pMTU discovery for multicast? */
201 V_ip6_v6only = 1;
202 V_ip6_keepfaith = 0;
203 V_ip6_log_time = (time_t)0L;
204#ifdef IPSTEALTH
205 V_ip6stealth = 0;
206#endif
207 V_nd6_onlink_ns_rfc4861 = 0; /* allow 'on-link' nd6 NS (RFC 4861) */
208
209 V_pmtu_expire = 60*10;
210 V_pmtu_probe = 60*2;
211
212 /* raw IP6 parameters */
213 /*
214 * Nominal space allocated to a raw ip socket.
215 */
216#define RIPV6SNDQ 8192
217#define RIPV6RCVQ 8192
218 V_rip6_sendspace = RIPV6SNDQ;
219 V_rip6_recvspace = RIPV6RCVQ;
220
221 /* ICMPV6 parameters */
222 V_icmp6_rediraccept = 1; /* accept and process redirects */
223 V_icmp6_redirtimeout = 10 * 60; /* 10 minutes */
224 V_icmp6errppslim = 100; /* 100pps */
225 /* control how to respond to NI queries */
226 V_icmp6_nodeinfo = (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
227
228 /* UDP on IP6 parameters */
229 V_udp6_sendspace = 9216; /* really max datagram size */
230 V_udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6));
231 /* 40 1K datagrams */
232 V_dad_init = 0;
233
234#ifdef DIAGNOSTIC
235 if (sizeof(struct protosw) != sizeof(struct ip6protosw))
236 panic("sizeof(protosw) != sizeof(ip6protosw)");
237#endif
238 pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
239 if (pr == 0)
240 panic("ip6_init");
241
242 /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
243 for (i = 0; i < IPPROTO_MAX; i++)
244 ip6_protox[i] = pr - inet6sw;
245 /*
246 * Cycle through IP protocols and put them into the appropriate place
247 * in ip6_protox[].
248 */
249 for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
250 pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
251 if (pr->pr_domain->dom_family == PF_INET6 &&
252 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
253 /* Be careful to only index valid IP protocols. */
254 if (pr->pr_protocol < IPPROTO_MAX)
255 ip6_protox[pr->pr_protocol] = pr - inet6sw;
256 }
257
258 /* Initialize packet filter hooks. */
259 inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
260 inet6_pfil_hook.ph_af = AF_INET6;
261 if ((i = pfil_head_register(&inet6_pfil_hook)) != 0)
262 printf("%s: WARNING: unable to register pfil hook, "
263 "error %d\n", __func__, i);
264
265 ip6intrq.ifq_maxlen = V_ip6qmaxlen;
266 mtx_init(&ip6intrq.ifq_mtx, "ip6_inq", NULL, MTX_DEF);
267 netisr_register(NETISR_IPV6, ip6_input, &ip6intrq, 0);
268 scope6_init();
269 addrsel_policy_init();
270 nd6_init();
271 frag6_init();
272 V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
273}
274
275static void
276ip6_init2(void *dummy)
277{
278 INIT_VNET_INET6(curvnet);
279
280 /* nd6_timer_init */
281 callout_init(&V_nd6_timer_ch, 0);
282 callout_reset(&V_nd6_timer_ch, hz, nd6_timer, NULL);
283
284 /* timer for regeneranation of temporary addresses randomize ID */
285 callout_init(&V_in6_tmpaddrtimer_ch, 0);
286 callout_reset(&V_in6_tmpaddrtimer_ch,
287 (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
288 V_ip6_temp_regen_advance) * hz,
289 in6_tmpaddrtimer, NULL);
290}
291
292/* cheat */
293/* This must be after route_init(), which is now SI_ORDER_THIRD */
294SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
295
296extern struct route_in6 ip6_forward_rt;
297
298void
299ip6_input(struct mbuf *m)
300{
301 INIT_VNET_NET(curvnet);
302 INIT_VNET_INET6(curvnet);
303 struct ip6_hdr *ip6;
304 int off = sizeof(struct ip6_hdr), nest;
305 u_int32_t plen;
306 u_int32_t rtalert = ~0;
307 int nxt, ours = 0;
308 struct ifnet *deliverifp = NULL;
309 struct in6_addr odst;
310 int srcrt = 0;
311
312#ifdef IPSEC
313 /*
314 * should the inner packet be considered authentic?
315 * see comment in ah4_input().
316 * NB: m cannot be NULL when passed to the input routine
317 */
318
319 m->m_flags &= ~M_AUTHIPHDR;
320 m->m_flags &= ~M_AUTHIPDGM;
321
322#endif /* IPSEC */
323
324 /*
325 * make sure we don't have onion peering information into m_tag.
326 */
327 ip6_delaux(m);
328
329 /*
330 * mbuf statistics
331 */
332 if (m->m_flags & M_EXT) {
333 if (m->m_next)
334 V_ip6stat.ip6s_mext2m++;
335 else
336 V_ip6stat.ip6s_mext1++;
337 } else {
338#define M2MMAX (sizeof(V_ip6stat.ip6s_m2m)/sizeof(V_ip6stat.ip6s_m2m[0]))
339 if (m->m_next) {
340 if (m->m_flags & M_LOOP) {
341 V_ip6stat.ip6s_m2m[V_loif[0].if_index]++; /* XXX */
342 } else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
343 V_ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
344 else
345 V_ip6stat.ip6s_m2m[0]++;
346 } else
347 V_ip6stat.ip6s_m1++;
348#undef M2MMAX
349 }
350
351 /* drop the packet if IPv6 operation is disabled on the IF */
352 if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) {
353 m_freem(m);
354 return;
355 }
356
357 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
358 V_ip6stat.ip6s_total++;
359
360#ifndef PULLDOWN_TEST
361 /*
362 * L2 bridge code and some other code can return mbuf chain
363 * that does not conform to KAME requirement. too bad.
364 * XXX: fails to join if interface MTU > MCLBYTES. jumbogram?
365 */
366 if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
367 struct mbuf *n;
368
369 MGETHDR(n, M_DONTWAIT, MT_HEADER);
370 if (n)
371 M_MOVE_PKTHDR(n, m);
372 if (n && n->m_pkthdr.len > MHLEN) {
373 MCLGET(n, M_DONTWAIT);
374 if ((n->m_flags & M_EXT) == 0) {
375 m_freem(n);
376 n = NULL;
377 }
378 }
379 if (n == NULL) {
380 m_freem(m);
381 return; /* ENOBUFS */
382 }
383
384 m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
385 n->m_len = n->m_pkthdr.len;
386 m_freem(m);
387 m = n;
388 }
389 IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
390#endif
391
392 if (m->m_len < sizeof(struct ip6_hdr)) {
393 struct ifnet *inifp;
394 inifp = m->m_pkthdr.rcvif;
395 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
396 V_ip6stat.ip6s_toosmall++;
397 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
398 return;
399 }
400 }
401
402 ip6 = mtod(m, struct ip6_hdr *);
403
404 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
405 V_ip6stat.ip6s_badvers++;
406 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
407 goto bad;
408 }
409
410 V_ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
411
412 /*
413 * Check against address spoofing/corruption.
414 */
415 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
416 IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
417 /*
418 * XXX: "badscope" is not very suitable for a multicast source.
419 */
420 V_ip6stat.ip6s_badscope++;
421 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
422 goto bad;
423 }
424 if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
425 !(m->m_flags & M_LOOP)) {
426 /*
427 * In this case, the packet should come from the loopback
428 * interface. However, we cannot just check the if_flags,
429 * because ip6_mloopback() passes the "actual" interface
430 * as the outgoing/incoming interface.
431 */
432 V_ip6stat.ip6s_badscope++;
433 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
434 goto bad;
435 }
436
437#ifdef ALTQ
438 if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
439 /* packet is dropped by traffic conditioner */
440 return;
441 }
442#endif
443 /*
444 * The following check is not documented in specs. A malicious
445 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
446 * and bypass security checks (act as if it was from 127.0.0.1 by using
447 * IPv6 src ::ffff:127.0.0.1). Be cautious.
448 *
449 * This check chokes if we are in an SIIT cloud. As none of BSDs
450 * support IPv4-less kernel compilation, we cannot support SIIT
451 * environment at all. So, it makes more sense for us to reject any
452 * malicious packets for non-SIIT environment, than try to do a
453 * partial support for SIIT environment.
454 */
455 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
456 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
457 V_ip6stat.ip6s_badscope++;
458 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
459 goto bad;
460 }
461#if 0
462 /*
463 * Reject packets with IPv4 compatible addresses (auto tunnel).
464 *
465 * The code forbids auto tunnel relay case in RFC1933 (the check is
466 * stronger than RFC1933). We may want to re-enable it if mech-xx
467 * is revised to forbid relaying case.
468 */
469 if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
470 IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
471 V_ip6stat.ip6s_badscope++;
472 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
473 goto bad;
474 }
475#endif
476
477 /*
478 * Run through list of hooks for input packets.
479 *
480 * NB: Beware of the destination address changing
481 * (e.g. by NAT rewriting). When this happens,
482 * tell ip6_forward to do the right thing.
483 */
484 odst = ip6->ip6_dst;
485
486 /* Jump over all PFIL processing if hooks are not active. */
487 if (!PFIL_HOOKED(&inet6_pfil_hook))
488 goto passin;
489
490 if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL))
491 return;
492 if (m == NULL) /* consumed by filter */
493 return;
494 ip6 = mtod(m, struct ip6_hdr *);
495 srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
496
497passin:
498 /*
499 * Disambiguate address scope zones (if there is ambiguity).
500 * We first make sure that the original source or destination address
501 * is not in our internal form for scoped addresses. Such addresses
502 * are not necessarily invalid spec-wise, but we cannot accept them due
503 * to the usage conflict.
504 * in6_setscope() then also checks and rejects the cases where src or
505 * dst are the loopback address and the receiving interface
506 * is not loopback.
507 */
508 if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
509 V_ip6stat.ip6s_badscope++; /* XXX */
510 goto bad;
511 }
512 if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) ||
513 in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) {
514 V_ip6stat.ip6s_badscope++;
515 goto bad;
516 }
517
518 /*
519 * Multicast check
520 */
521 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
522 struct in6_multi *in6m = 0;
523
524 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
525 /*
526 * See if we belong to the destination multicast group on the
527 * arrival interface.
528 */
529 IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m);
530 if (in6m)
531 ours = 1;
532 else if (!ip6_mrouter) {
533 V_ip6stat.ip6s_notmember++;
534 V_ip6stat.ip6s_cantforward++;
535 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
536 goto bad;
537 }
538 deliverifp = m->m_pkthdr.rcvif;
539 goto hbhcheck;
540 }
541
542 /*
543 * Unicast check
544 */
545 if (V_ip6_forward_rt.ro_rt != NULL &&
546 (V_ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 &&
547 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
548 &((struct sockaddr_in6 *)(&V_ip6_forward_rt.ro_dst))->sin6_addr))
549 V_ip6stat.ip6s_forward_cachehit++;
550 else {
551 struct sockaddr_in6 *dst6;
552
553 if (V_ip6_forward_rt.ro_rt) {
554 /* route is down or destination is different */
555 V_ip6stat.ip6s_forward_cachemiss++;
556 RTFREE(V_ip6_forward_rt.ro_rt);
557 V_ip6_forward_rt.ro_rt = 0;
558 }
559
560 bzero(&V_ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6));
561 dst6 = (struct sockaddr_in6 *)&V_ip6_forward_rt.ro_dst;
562 dst6->sin6_len = sizeof(struct sockaddr_in6);
563 dst6->sin6_family = AF_INET6;
564 dst6->sin6_addr = ip6->ip6_dst;
565
566 rtalloc((struct route *)&V_ip6_forward_rt);
567 }
568
569#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
570
571 /*
572 * Accept the packet if the forwarding interface to the destination
573 * according to the routing table is the loopback interface,
574 * unless the associated route has a gateway.
575 * Note that this approach causes to accept a packet if there is a
576 * route to the loopback interface for the destination of the packet.
577 * But we think it's even useful in some situations, e.g. when using
578 * a special daemon which wants to intercept the packet.
579 *
580 * XXX: some OSes automatically make a cloned route for the destination
581 * of an outgoing packet. If the outgoing interface of the packet
582 * is a loopback one, the kernel would consider the packet to be
583 * accepted, even if we have no such address assinged on the interface.
584 * We check the cloned flag of the route entry to reject such cases,
585 * assuming that route entries for our own addresses are not made by
586 * cloning (it should be true because in6_addloop explicitly installs
587 * the host route). However, we might have to do an explicit check
588 * while it would be less efficient. Or, should we rather install a
589 * reject route for such a case?
590 */
591 if (V_ip6_forward_rt.ro_rt &&
592 (V_ip6_forward_rt.ro_rt->rt_flags &
593 (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
594#ifdef RTF_WASCLONED
595 !(V_ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) &&
596#endif
597#ifdef RTF_CLONED
598 !(V_ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) &&
599#endif
600#if 0
601 /*
602 * The check below is redundant since the comparison of
603 * the destination and the key of the rtentry has
604 * already done through looking up the routing table.
605 */
606 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
607 &rt6_key(V_ip6_forward_rt.ro_rt)->sin6_addr)
608#endif
609 V_ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) {
610 struct in6_ifaddr *ia6 =
611 (struct in6_ifaddr *)V_ip6_forward_rt.ro_rt->rt_ifa;
612
613 /*
614 * record address information into m_tag.
615 */
616 (void)ip6_setdstifaddr(m, ia6);
617
618 /*
619 * packets to a tentative, duplicated, or somehow invalid
620 * address must not be accepted.
621 */
622 if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
623 /* this address is ready */
624 ours = 1;
625 deliverifp = ia6->ia_ifp; /* correct? */
626 /* Count the packet in the ip address stats */
627 ia6->ia_ifa.if_ipackets++;
628 ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
629 goto hbhcheck;
630 } else {
631 char ip6bufs[INET6_ADDRSTRLEN];
632 char ip6bufd[INET6_ADDRSTRLEN];
633 /* address is not ready, so discard the packet. */
634 nd6log((LOG_INFO,
635 "ip6_input: packet to an unready address %s->%s\n",
636 ip6_sprintf(ip6bufs, &ip6->ip6_src),
637 ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
638
639 goto bad;
640 }
641 }
642
643 /*
644 * FAITH (Firewall Aided Internet Translator)
645 */
646 if (V_ip6_keepfaith) {
647 if (V_ip6_forward_rt.ro_rt && V_ip6_forward_rt.ro_rt->rt_ifp
648 && V_ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) {
649 /* XXX do we need more sanity checks? */
650 ours = 1;
651 deliverifp = V_ip6_forward_rt.ro_rt->rt_ifp; /* faith */
652 goto hbhcheck;
653 }
654 }
655
656 /*
657 * Now there is no reason to process the packet if it's not our own
658 * and we're not a router.
659 */
660 if (!V_ip6_forwarding) {
661 V_ip6stat.ip6s_cantforward++;
662 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
663 goto bad;
664 }
665
666 hbhcheck:
667 /*
668 * record address information into m_tag, if we don't have one yet.
669 * note that we are unable to record it, if the address is not listed
670 * as our interface address (e.g. multicast addresses, addresses
671 * within FAITH prefixes and such).
672 */
673 if (deliverifp && !ip6_getdstifaddr(m)) {
674 struct in6_ifaddr *ia6;
675
676 ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
677 if (ia6) {
678 if (!ip6_setdstifaddr(m, ia6)) {
679 /*
680 * XXX maybe we should drop the packet here,
681 * as we could not provide enough information
682 * to the upper layers.
683 */
684 }
685 }
686 }
687
688 /*
689 * Process Hop-by-Hop options header if it's contained.
690 * m may be modified in ip6_hopopts_input().
691 * If a JumboPayload option is included, plen will also be modified.
692 */
693 plen = (u_int32_t)ntohs(ip6->ip6_plen);
694 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
695 struct ip6_hbh *hbh;
696
697 if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
698#if 0 /*touches NULL pointer*/
699 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
700#endif
701 return; /* m have already been freed */
702 }
703
704 /* adjust pointer */
705 ip6 = mtod(m, struct ip6_hdr *);
706
707 /*
708 * if the payload length field is 0 and the next header field
709 * indicates Hop-by-Hop Options header, then a Jumbo Payload
710 * option MUST be included.
711 */
712 if (ip6->ip6_plen == 0 && plen == 0) {
713 /*
714 * Note that if a valid jumbo payload option is
715 * contained, ip6_hopopts_input() must set a valid
716 * (non-zero) payload length to the variable plen.
717 */
718 V_ip6stat.ip6s_badoptions++;
719 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
720 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
721 icmp6_error(m, ICMP6_PARAM_PROB,
722 ICMP6_PARAMPROB_HEADER,
723 (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
724 return;
725 }
726#ifndef PULLDOWN_TEST
727 /* ip6_hopopts_input() ensures that mbuf is contiguous */
728 hbh = (struct ip6_hbh *)(ip6 + 1);
729#else
730 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
731 sizeof(struct ip6_hbh));
732 if (hbh == NULL) {
733 V_ip6stat.ip6s_tooshort++;
734 return;
735 }
736#endif
737 nxt = hbh->ip6h_nxt;
738
739 /*
740 * If we are acting as a router and the packet contains a
741 * router alert option, see if we know the option value.
742 * Currently, we only support the option value for MLD, in which
743 * case we should pass the packet to the multicast routing
744 * daemon.
745 */
746 if (rtalert != ~0 && V_ip6_forwarding) {
747 switch (rtalert) {
748 case IP6OPT_RTALERT_MLD:
749 ours = 1;
750 break;
751 default:
752 /*
753 * RFC2711 requires unrecognized values must be
754 * silently ignored.
755 */
756 break;
757 }
758 }
759 } else
760 nxt = ip6->ip6_nxt;
761
762 /*
763 * Check that the amount of data in the buffers
764 * is as at least much as the IPv6 header would have us expect.
765 * Trim mbufs if longer than we expect.
766 * Drop packet if shorter than we expect.
767 */
768 if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
769 V_ip6stat.ip6s_tooshort++;
770 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
771 goto bad;
772 }
773 if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
774 if (m->m_len == m->m_pkthdr.len) {
775 m->m_len = sizeof(struct ip6_hdr) + plen;
776 m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
777 } else
778 m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
779 }
780
781 /*
782 * Forward if desirable.
783 */
784 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
785 /*
786 * If we are acting as a multicast router, all
787 * incoming multicast packets are passed to the
788 * kernel-level multicast forwarding function.
789 * The packet is returned (relatively) intact; if
790 * ip6_mforward() returns a non-zero value, the packet
791 * must be discarded, else it may be accepted below.
792 */
793 if (ip6_mrouter && ip6_mforward &&
794 ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
795 V_ip6stat.ip6s_cantforward++;
796 m_freem(m);
797 return;
798 }
799 if (!ours) {
800 m_freem(m);
801 return;
802 }
803 } else if (!ours) {
804 ip6_forward(m, srcrt);
805 return;
806 }
807
808 ip6 = mtod(m, struct ip6_hdr *);
809
810 /*
811 * Malicious party may be able to use IPv4 mapped addr to confuse
812 * tcp/udp stack and bypass security checks (act as if it was from
813 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious.
814 *
815 * For SIIT end node behavior, you may want to disable the check.
816 * However, you will become vulnerable to attacks using IPv4 mapped
817 * source.
818 */
819 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
820 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
821 V_ip6stat.ip6s_badscope++;
822 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
823 goto bad;
824 }
825
826 /*
827 * Tell launch routine the next header
828 */
829 V_ip6stat.ip6s_delivered++;
830 in6_ifstat_inc(deliverifp, ifs6_in_deliver);
831 nest = 0;
832
833 while (nxt != IPPROTO_DONE) {
834 if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
835 V_ip6stat.ip6s_toomanyhdr++;
836 goto bad;
837 }
838
839 /*
840 * protection against faulty packet - there should be
841 * more sanity checks in header chain processing.
842 */
843 if (m->m_pkthdr.len < off) {
844 V_ip6stat.ip6s_tooshort++;
845 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
846 goto bad;
847 }
848
849#ifdef IPSEC
850 /*
851 * enforce IPsec policy checking if we are seeing last header.
852 * note that we do not visit this with protocols with pcb layer
853 * code - like udp/tcp/raw ip.
854 */
855 if (ip6_ipsec_input(m, nxt))
856 goto bad;
857#endif /* IPSEC */
858 nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
859 }
860 return;
861 bad:
862 m_freem(m);
863}
864
865/*
866 * set/grab in6_ifaddr correspond to IPv6 destination address.
867 * XXX backward compatibility wrapper
868 */
869static struct ip6aux *
870ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
871{
872 struct ip6aux *ip6a;
873
874 ip6a = ip6_addaux(m);
875 if (ip6a)
876 ip6a->ip6a_dstia6 = ia6;
877 return ip6a; /* NULL if failed to set */
878}
879
880struct in6_ifaddr *
881ip6_getdstifaddr(struct mbuf *m)
882{
883 struct ip6aux *ip6a;
884
885 ip6a = ip6_findaux(m);
886 if (ip6a)
887 return ip6a->ip6a_dstia6;
888 else
889 return NULL;
890}
891
892/*
893 * Hop-by-Hop options header processing. If a valid jumbo payload option is
894 * included, the real payload length will be stored in plenp.
895 *
896 * rtalertp - XXX: should be stored more smart way
897 */
898static int
899ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
900 struct mbuf **mp, int *offp)
901{
902 INIT_VNET_INET6(curvnet);
903 struct mbuf *m = *mp;
904 int off = *offp, hbhlen;
905 struct ip6_hbh *hbh;
906 u_int8_t *opt;
907
908 /* validation of the length of the header */
909#ifndef PULLDOWN_TEST
910 IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
911 hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
912 hbhlen = (hbh->ip6h_len + 1) << 3;
913
914 IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
915 hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
916#else
917 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
918 sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
919 if (hbh == NULL) {
920 V_ip6stat.ip6s_tooshort++;
921 return -1;
922 }
923 hbhlen = (hbh->ip6h_len + 1) << 3;
924 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
925 hbhlen);
926 if (hbh == NULL) {
927 V_ip6stat.ip6s_tooshort++;
928 return -1;
929 }
930#endif
931 off += hbhlen;
932 hbhlen -= sizeof(struct ip6_hbh);
933 opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
934
935 if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
936 hbhlen, rtalertp, plenp) < 0)
937 return (-1);
938
939 *offp = off;
940 *mp = m;
941 return (0);
942}
943
944/*
945 * Search header for all Hop-by-hop options and process each option.
946 * This function is separate from ip6_hopopts_input() in order to
947 * handle a case where the sending node itself process its hop-by-hop
948 * options header. In such a case, the function is called from ip6_output().
949 *
950 * The function assumes that hbh header is located right after the IPv6 header
951 * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
952 * opthead + hbhlen is located in continuous memory region.
953 */
954int
955ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
956 u_int32_t *rtalertp, u_int32_t *plenp)
957{
958 INIT_VNET_INET6(curvnet);
959 struct ip6_hdr *ip6;
960 int optlen = 0;
961 u_int8_t *opt = opthead;
962 u_int16_t rtalert_val;
963 u_int32_t jumboplen;
964 const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
965
966 for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
967 switch (*opt) {
968 case IP6OPT_PAD1:
969 optlen = 1;
970 break;
971 case IP6OPT_PADN:
972 if (hbhlen < IP6OPT_MINLEN) {
973 V_ip6stat.ip6s_toosmall++;
974 goto bad;
975 }
976 optlen = *(opt + 1) + 2;
977 break;
978 case IP6OPT_ROUTER_ALERT:
979 /* XXX may need check for alignment */
980 if (hbhlen < IP6OPT_RTALERT_LEN) {
981 V_ip6stat.ip6s_toosmall++;
982 goto bad;
983 }
984 if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
985 /* XXX stat */
986 icmp6_error(m, ICMP6_PARAM_PROB,
987 ICMP6_PARAMPROB_HEADER,
988 erroff + opt + 1 - opthead);
989 return (-1);
990 }
991 optlen = IP6OPT_RTALERT_LEN;
992 bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
993 *rtalertp = ntohs(rtalert_val);
994 break;
995 case IP6OPT_JUMBO:
996 /* XXX may need check for alignment */
997 if (hbhlen < IP6OPT_JUMBO_LEN) {
998 V_ip6stat.ip6s_toosmall++;
999 goto bad;
1000 }
1001 if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
1002 /* XXX stat */
1003 icmp6_error(m, ICMP6_PARAM_PROB,
1004 ICMP6_PARAMPROB_HEADER,
1005 erroff + opt + 1 - opthead);
1006 return (-1);
1007 }
1008 optlen = IP6OPT_JUMBO_LEN;
1009
1010 /*
1011 * IPv6 packets that have non 0 payload length
1012 * must not contain a jumbo payload option.
1013 */
1014 ip6 = mtod(m, struct ip6_hdr *);
1015 if (ip6->ip6_plen) {
1016 V_ip6stat.ip6s_badoptions++;
1017 icmp6_error(m, ICMP6_PARAM_PROB,
1018 ICMP6_PARAMPROB_HEADER,
1019 erroff + opt - opthead);
1020 return (-1);
1021 }
1022
1023 /*
1024 * We may see jumbolen in unaligned location, so
1025 * we'd need to perform bcopy().
1026 */
1027 bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
1028 jumboplen = (u_int32_t)htonl(jumboplen);
1029
1030#if 1
1031 /*
1032 * if there are multiple jumbo payload options,
1033 * *plenp will be non-zero and the packet will be
1034 * rejected.
1035 * the behavior may need some debate in ipngwg -
1036 * multiple options does not make sense, however,
1037 * there's no explicit mention in specification.
1038 */
1039 if (*plenp != 0) {
1040 V_ip6stat.ip6s_badoptions++;
1041 icmp6_error(m, ICMP6_PARAM_PROB,
1042 ICMP6_PARAMPROB_HEADER,
1043 erroff + opt + 2 - opthead);
1044 return (-1);
1045 }
1046#endif
1047
1048 /*
1049 * jumbo payload length must be larger than 65535.
1050 */
1051 if (jumboplen <= IPV6_MAXPACKET) {
1052 V_ip6stat.ip6s_badoptions++;
1053 icmp6_error(m, ICMP6_PARAM_PROB,
1054 ICMP6_PARAMPROB_HEADER,
1055 erroff + opt + 2 - opthead);
1056 return (-1);
1057 }
1058 *plenp = jumboplen;
1059
1060 break;
1061 default: /* unknown option */
1062 if (hbhlen < IP6OPT_MINLEN) {
1063 V_ip6stat.ip6s_toosmall++;
1064 goto bad;
1065 }
1066 optlen = ip6_unknown_opt(opt, m,
1067 erroff + opt - opthead);
1068 if (optlen == -1)
1069 return (-1);
1070 optlen += 2;
1071 break;
1072 }
1073 }
1074
1075 return (0);
1076
1077 bad:
1078 m_freem(m);
1079 return (-1);
1080}
1081
1082/*
1083 * Unknown option processing.
1084 * The third argument `off' is the offset from the IPv6 header to the option,
1085 * which is necessary if the IPv6 header the and option header and IPv6 header
1086 * is not continuous in order to return an ICMPv6 error.
1087 */
1088int
1089ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
1090{
1091 INIT_VNET_INET6(curvnet);
1092 struct ip6_hdr *ip6;
1093
1094 switch (IP6OPT_TYPE(*optp)) {
1095 case IP6OPT_TYPE_SKIP: /* ignore the option */
1096 return ((int)*(optp + 1));
1097 case IP6OPT_TYPE_DISCARD: /* silently discard */
1098 m_freem(m);
1099 return (-1);
1100 case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
1101 V_ip6stat.ip6s_badoptions++;
1102 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
1103 return (-1);
1104 case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
1105 V_ip6stat.ip6s_badoptions++;
1106 ip6 = mtod(m, struct ip6_hdr *);
1107 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
1108 (m->m_flags & (M_BCAST|M_MCAST)))
1109 m_freem(m);
1110 else
1111 icmp6_error(m, ICMP6_PARAM_PROB,
1112 ICMP6_PARAMPROB_OPTION, off);
1113 return (-1);
1114 }
1115
1116 m_freem(m); /* XXX: NOTREACHED */
1117 return (-1);
1118}
1119
1120/*
1121 * Create the "control" list for this pcb.
1122 * These functions will not modify mbuf chain at all.
1123 *
1124 * With KAME mbuf chain restriction:
1125 * The routine will be called from upper layer handlers like tcp6_input().
1126 * Thus the routine assumes that the caller (tcp6_input) have already
1127 * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
1128 * very first mbuf on the mbuf chain.
1129 *
1130 * ip6_savecontrol_v4 will handle those options that are possible to be
1131 * set on a v4-mapped socket.
1132 * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those
1133 * options and handle the v6-only ones itself.
1134 */
1135struct mbuf **
1136ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
1137 int *v4only)
1138{
1139 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1140
1141#ifdef SO_TIMESTAMP
1142 if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) {
1143 struct timeval tv;
1144
1145 microtime(&tv);
1146 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1147 SCM_TIMESTAMP, SOL_SOCKET);
1148 if (*mp)
1149 mp = &(*mp)->m_next;
1150 }
1151#endif
1152
1153 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
1154 if (v4only != NULL)
1155 *v4only = 1;
1156 return (mp);
1157 }
1158
1159#define IS2292(inp, x, y) (((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y))
1160 /* RFC 2292 sec. 5 */
1161 if ((inp->inp_flags & IN6P_PKTINFO) != 0) {
1162 struct in6_pktinfo pi6;
1163
1164 bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
1165 in6_clearscope(&pi6.ipi6_addr); /* XXX */
1166 pi6.ipi6_ifindex =
1167 (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
1168
1169 *mp = sbcreatecontrol((caddr_t) &pi6,
1170 sizeof(struct in6_pktinfo),
1171 IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
1172 if (*mp)
1173 mp = &(*mp)->m_next;
1174 }
1175
1176 if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) {
1177 int hlim = ip6->ip6_hlim & 0xff;
1178
1179 *mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
1180 IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT),
1181 IPPROTO_IPV6);
1182 if (*mp)
1183 mp = &(*mp)->m_next;
1184 }
1185
1186 if (v4only != NULL)
1187 *v4only = 0;
1188 return (mp);
1189}
1190
1191void
1192ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
1193{
1194 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1195 int v4only = 0;
1196
1197 mp = ip6_savecontrol_v4(in6p, m, mp, &v4only);
1198 if (v4only)
1199 return;
1200
1201 if ((in6p->in6p_flags & IN6P_TCLASS) != 0) {
1202 u_int32_t flowinfo;
1203 int tclass;
1204
1205 flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
1206 flowinfo >>= 20;
1207
1208 tclass = flowinfo & 0xff;
1209 *mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
1210 IPV6_TCLASS, IPPROTO_IPV6);
1211 if (*mp)
1212 mp = &(*mp)->m_next;
1213 }
1214
1215 /*
1216 * IPV6_HOPOPTS socket option. Recall that we required super-user
1217 * privilege for the option (see ip6_ctloutput), but it might be too
1218 * strict, since there might be some hop-by-hop options which can be
1219 * returned to normal user.
1220 * See also RFC 2292 section 6 (or RFC 3542 section 8).
1221 */
1222 if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0) {
1223 /*
1224 * Check if a hop-by-hop options header is contatined in the
1225 * received packet, and if so, store the options as ancillary
1226 * data. Note that a hop-by-hop options header must be
1227 * just after the IPv6 header, which is assured through the
1228 * IPv6 input processing.
1229 */
1230 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
1231 struct ip6_hbh *hbh;
1232 int hbhlen = 0;
1233#ifdef PULLDOWN_TEST
1234 struct mbuf *ext;
1235#endif
1236
1237#ifndef PULLDOWN_TEST
1238 hbh = (struct ip6_hbh *)(ip6 + 1);
1239 hbhlen = (hbh->ip6h_len + 1) << 3;
1240#else
1241 ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
1242 ip6->ip6_nxt);
1243 if (ext == NULL) {
1244 V_ip6stat.ip6s_tooshort++;
1245 return;
1246 }
1247 hbh = mtod(ext, struct ip6_hbh *);
1248 hbhlen = (hbh->ip6h_len + 1) << 3;
1249 if (hbhlen != ext->m_len) {
1250 m_freem(ext);
1251 V_ip6stat.ip6s_tooshort++;
1252 return;
1253 }
1254#endif
1255
1256 /*
1257 * XXX: We copy the whole header even if a
1258 * jumbo payload option is included, the option which
1259 * is to be removed before returning according to
1260 * RFC2292.
1261 * Note: this constraint is removed in RFC3542
1262 */
1263 *mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
1264 IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
1265 IPPROTO_IPV6);
1266 if (*mp)
1267 mp = &(*mp)->m_next;
1268#ifdef PULLDOWN_TEST
1269 m_freem(ext);
1270#endif
1271 }
1272 }
1273
1274 if ((in6p->in6p_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
1275 int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
1276
1277 /*
1278 * Search for destination options headers or routing
1279 * header(s) through the header chain, and stores each
1280 * header as ancillary data.
1281 * Note that the order of the headers remains in
1282 * the chain of ancillary data.
1283 */
1284 while (1) { /* is explicit loop prevention necessary? */
1285 struct ip6_ext *ip6e = NULL;
1286 int elen;
1287#ifdef PULLDOWN_TEST
1288 struct mbuf *ext = NULL;
1289#endif
1290
1291 /*
1292 * if it is not an extension header, don't try to
1293 * pull it from the chain.
1294 */
1295 switch (nxt) {
1296 case IPPROTO_DSTOPTS:
1297 case IPPROTO_ROUTING:
1298 case IPPROTO_HOPOPTS:
1299 case IPPROTO_AH: /* is it possible? */
1300 break;
1301 default:
1302 goto loopend;
1303 }
1304
1305#ifndef PULLDOWN_TEST
1306 if (off + sizeof(*ip6e) > m->m_len)
1307 goto loopend;
1308 ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
1309 if (nxt == IPPROTO_AH)
1310 elen = (ip6e->ip6e_len + 2) << 2;
1311 else
1312 elen = (ip6e->ip6e_len + 1) << 3;
1313 if (off + elen > m->m_len)
1314 goto loopend;
1315#else
1316 ext = ip6_pullexthdr(m, off, nxt);
1317 if (ext == NULL) {
1318 V_ip6stat.ip6s_tooshort++;
1319 return;
1320 }
1321 ip6e = mtod(ext, struct ip6_ext *);
1322 if (nxt == IPPROTO_AH)
1323 elen = (ip6e->ip6e_len + 2) << 2;
1324 else
1325 elen = (ip6e->ip6e_len + 1) << 3;
1326 if (elen != ext->m_len) {
1327 m_freem(ext);
1328 V_ip6stat.ip6s_tooshort++;
1329 return;
1330 }
1331#endif
1332
1333 switch (nxt) {
1334 case IPPROTO_DSTOPTS:
1335 if (!(in6p->in6p_flags & IN6P_DSTOPTS))
1336 break;
1337
1338 *mp = sbcreatecontrol((caddr_t)ip6e, elen,
1339 IS2292(in6p,
1340 IPV6_2292DSTOPTS, IPV6_DSTOPTS),
1341 IPPROTO_IPV6);
1342 if (*mp)
1343 mp = &(*mp)->m_next;
1344 break;
1345 case IPPROTO_ROUTING:
1346 if (!in6p->in6p_flags & IN6P_RTHDR)
1347 break;
1348
1349 *mp = sbcreatecontrol((caddr_t)ip6e, elen,
1350 IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR),
1351 IPPROTO_IPV6);
1352 if (*mp)
1353 mp = &(*mp)->m_next;
1354 break;
1355 case IPPROTO_HOPOPTS:
1356 case IPPROTO_AH: /* is it possible? */
1357 break;
1358
1359 default:
1360 /*
1361 * other cases have been filtered in the above.
1362 * none will visit this case. here we supply
1363 * the code just in case (nxt overwritten or
1364 * other cases).
1365 */
1366#ifdef PULLDOWN_TEST
1367 m_freem(ext);
1368#endif
1369 goto loopend;
1370
1371 }
1372
1373 /* proceed with the next header. */
1374 off += elen;
1375 nxt = ip6e->ip6e_nxt;
1376 ip6e = NULL;
1377#ifdef PULLDOWN_TEST
1378 m_freem(ext);
1379 ext = NULL;
1380#endif
1381 }
1382 loopend:
1383 ;
1384 }
1385}
1386#undef IS2292
1387
1388void
1389ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
1390{
1391 struct socket *so;
1392 struct mbuf *m_mtu;
1393 struct ip6_mtuinfo mtuctl;
1394
1395 so = in6p->inp_socket;
1396
1397 if (mtu == NULL)
1398 return;
1399
1400#ifdef DIAGNOSTIC
1401 if (so == NULL) /* I believe this is impossible */
1402 panic("ip6_notify_pmtu: socket is NULL");
1403#endif
1404
1405 bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */
1406 mtuctl.ip6m_mtu = *mtu;
1407 mtuctl.ip6m_addr = *dst;
1408 if (sa6_recoverscope(&mtuctl.ip6m_addr))
1409 return;
1410
1411 if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
1412 IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
1413 return;
1414
1415 if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
1416 == 0) {
1417 m_freem(m_mtu);
1418 /* XXX: should count statistics */
1419 } else
1420 sorwakeup(so);
1421
1422 return;
1423}
1424
1425#ifdef PULLDOWN_TEST
1426/*
1427 * pull single extension header from mbuf chain. returns single mbuf that
1428 * contains the result, or NULL on error.
1429 */
1430static struct mbuf *
1431ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
1432{
1433 struct ip6_ext ip6e;
1434 size_t elen;
1435 struct mbuf *n;
1436
1437#ifdef DIAGNOSTIC
1438 switch (nxt) {
1439 case IPPROTO_DSTOPTS:
1440 case IPPROTO_ROUTING:
1441 case IPPROTO_HOPOPTS:
1442 case IPPROTO_AH: /* is it possible? */
1443 break;
1444 default:
1445 printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
1446 }
1447#endif
1448
1449 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1450 if (nxt == IPPROTO_AH)
1451 elen = (ip6e.ip6e_len + 2) << 2;
1452 else
1453 elen = (ip6e.ip6e_len + 1) << 3;
1454
1455 MGET(n, M_DONTWAIT, MT_DATA);
1456 if (n && elen >= MLEN) {
1457 MCLGET(n, M_DONTWAIT);
1458 if ((n->m_flags & M_EXT) == 0) {
1459 m_free(n);
1460 n = NULL;
1461 }
1462 }
1463 if (!n)
1464 return NULL;
1465
1466 n->m_len = 0;
1467 if (elen >= M_TRAILINGSPACE(n)) {
1468 m_free(n);
1469 return NULL;
1470 }
1471
1472 m_copydata(m, off, elen, mtod(n, caddr_t));
1473 n->m_len = elen;
1474 return n;
1475}
1476#endif
1477
1478/*
1479 * Get pointer to the previous header followed by the header
1480 * currently processed.
1481 * XXX: This function supposes that
1482 * M includes all headers,
1483 * the next header field and the header length field of each header
1484 * are valid, and
1485 * the sum of each header length equals to OFF.
1486 * Because of these assumptions, this function must be called very
1487 * carefully. Moreover, it will not be used in the near future when
1488 * we develop `neater' mechanism to process extension headers.
1489 */
1490char *
1491ip6_get_prevhdr(struct mbuf *m, int off)
1492{
1493 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1494
1495 if (off == sizeof(struct ip6_hdr))
1496 return (&ip6->ip6_nxt);
1497 else {
1498 int len, nxt;
1499 struct ip6_ext *ip6e = NULL;
1500
1501 nxt = ip6->ip6_nxt;
1502 len = sizeof(struct ip6_hdr);
1503 while (len < off) {
1504 ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
1505
1506 switch (nxt) {
1507 case IPPROTO_FRAGMENT:
1508 len += sizeof(struct ip6_frag);
1509 break;
1510 case IPPROTO_AH:
1511 len += (ip6e->ip6e_len + 2) << 2;
1512 break;
1513 default:
1514 len += (ip6e->ip6e_len + 1) << 3;
1515 break;
1516 }
1517 nxt = ip6e->ip6e_nxt;
1518 }
1519 if (ip6e)
1520 return (&ip6e->ip6e_nxt);
1521 else
1522 return NULL;
1523 }
1524}
1525
1526/*
1527 * get next header offset. m will be retained.
1528 */
1529int
1530ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
1531{
1532 struct ip6_hdr ip6;
1533 struct ip6_ext ip6e;
1534 struct ip6_frag fh;
1535
1536 /* just in case */
1537 if (m == NULL)
1538 panic("ip6_nexthdr: m == NULL");
1539 if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
1540 return -1;
1541
1542 switch (proto) {
1543 case IPPROTO_IPV6:
1544 if (m->m_pkthdr.len < off + sizeof(ip6))
1545 return -1;
1546 m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
1547 if (nxtp)
1548 *nxtp = ip6.ip6_nxt;
1549 off += sizeof(ip6);
1550 return off;
1551
1552 case IPPROTO_FRAGMENT:
1553 /*
1554 * terminate parsing if it is not the first fragment,
1555 * it does not make sense to parse through it.
1556 */
1557 if (m->m_pkthdr.len < off + sizeof(fh))
1558 return -1;
1559 m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
1560 /* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
1561 if (fh.ip6f_offlg & IP6F_OFF_MASK)
1562 return -1;
1563 if (nxtp)
1564 *nxtp = fh.ip6f_nxt;
1565 off += sizeof(struct ip6_frag);
1566 return off;
1567
1568 case IPPROTO_AH:
1569 if (m->m_pkthdr.len < off + sizeof(ip6e))
1570 return -1;
1571 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1572 if (nxtp)
1573 *nxtp = ip6e.ip6e_nxt;
1574 off += (ip6e.ip6e_len + 2) << 2;
1575 return off;
1576
1577 case IPPROTO_HOPOPTS:
1578 case IPPROTO_ROUTING:
1579 case IPPROTO_DSTOPTS:
1580 if (m->m_pkthdr.len < off + sizeof(ip6e))
1581 return -1;
1582 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1583 if (nxtp)
1584 *nxtp = ip6e.ip6e_nxt;
1585 off += (ip6e.ip6e_len + 1) << 3;
1586 return off;
1587
1588 case IPPROTO_NONE:
1589 case IPPROTO_ESP:
1590 case IPPROTO_IPCOMP:
1591 /* give up */
1592 return -1;
1593
1594 default:
1595 return -1;
1596 }
1597
1598 return -1;
1599}
1600
1601/*
1602 * get offset for the last header in the chain. m will be kept untainted.
1603 */
1604int
1605ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
1606{
1607 int newoff;
1608 int nxt;
1609
1610 if (!nxtp) {
1611 nxt = -1;
1612 nxtp = &nxt;
1613 }
1614 while (1) {
1615 newoff = ip6_nexthdr(m, off, proto, nxtp);
1616 if (newoff < 0)
1617 return off;
1618 else if (newoff < off)
1619 return -1; /* invalid */
1620 else if (newoff == off)
1621 return newoff;
1622
1623 off = newoff;
1624 proto = *nxtp;
1625 }
1626}
1627
1628struct ip6aux *
1629ip6_addaux(struct mbuf *m)
1630{
1631 struct m_tag *mtag;
1632
1633 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1634 if (!mtag) {
1635 mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
1636 M_NOWAIT);
1637 if (mtag) {
1638 m_tag_prepend(m, mtag);
1639 bzero(mtag + 1, sizeof(struct ip6aux));
1640 }
1641 }
1642 return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
1643}
1644
1645struct ip6aux *
1646ip6_findaux(struct mbuf *m)
1647{
1648 struct m_tag *mtag;
1649
1650 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1651 return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
1652}
1653
1654void
1655ip6_delaux(struct mbuf *m)
1656{
1657 struct m_tag *mtag;
1658
1659 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1660 if (mtag)
1661 m_tag_delete(m, mtag);
1662}
1663
1664/*
1665 * System control for IP6
1666 */
1667
1668u_char inet6ctlerrmap[PRC_NCMDS] = {
1669 0, 0, 0, 0,
1670 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1671 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1672 EMSGSIZE, EHOSTUNREACH, 0, 0,
1673 0, 0, 0, 0,
1674 ENOPROTOOPT
1675};
183
184#ifndef IPV6FORWARDING
185#ifdef GATEWAY6
186#define IPV6FORWARDING 1 /* forward IP6 packets not for us */
187#else
188#define IPV6FORWARDING 0 /* don't forward IP6 packets not for us */
189#endif /* GATEWAY6 */
190#endif /* !IPV6FORWARDING */
191
192#ifndef IPV6_SENDREDIRECTS
193#define IPV6_SENDREDIRECTS 1
194#endif
195
196 V_ip6_forwarding = IPV6FORWARDING; /* act as router? */
197 V_ip6_sendredirects = IPV6_SENDREDIRECTS;
198 V_ip6_defhlim = IPV6_DEFHLIM;
199 V_ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS;
200 V_ip6_accept_rtadv = 0; /* "IPV6FORWARDING ? 0 : 1" is dangerous */
201 V_ip6_log_interval = 5;
202 V_ip6_hdrnestlimit = 15; /* How many header options will we process? */
203 V_ip6_dad_count = 1; /* DupAddrDetectionTransmits */
204 V_ip6_auto_flowlabel = 1;
205 V_ip6_use_deprecated = 1;/* allow deprecated addr (RFC2462 5.5.4) */
206 V_ip6_rr_prune = 5; /* router renumbering prefix
207 * walk list every 5 sec. */
208 V_ip6_mcast_pmtu = 0; /* enable pMTU discovery for multicast? */
209 V_ip6_v6only = 1;
210 V_ip6_keepfaith = 0;
211 V_ip6_log_time = (time_t)0L;
212#ifdef IPSTEALTH
213 V_ip6stealth = 0;
214#endif
215 V_nd6_onlink_ns_rfc4861 = 0; /* allow 'on-link' nd6 NS (RFC 4861) */
216
217 V_pmtu_expire = 60*10;
218 V_pmtu_probe = 60*2;
219
220 /* raw IP6 parameters */
221 /*
222 * Nominal space allocated to a raw ip socket.
223 */
224#define RIPV6SNDQ 8192
225#define RIPV6RCVQ 8192
226 V_rip6_sendspace = RIPV6SNDQ;
227 V_rip6_recvspace = RIPV6RCVQ;
228
229 /* ICMPV6 parameters */
230 V_icmp6_rediraccept = 1; /* accept and process redirects */
231 V_icmp6_redirtimeout = 10 * 60; /* 10 minutes */
232 V_icmp6errppslim = 100; /* 100pps */
233 /* control how to respond to NI queries */
234 V_icmp6_nodeinfo = (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
235
236 /* UDP on IP6 parameters */
237 V_udp6_sendspace = 9216; /* really max datagram size */
238 V_udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6));
239 /* 40 1K datagrams */
240 V_dad_init = 0;
241
242#ifdef DIAGNOSTIC
243 if (sizeof(struct protosw) != sizeof(struct ip6protosw))
244 panic("sizeof(protosw) != sizeof(ip6protosw)");
245#endif
246 pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
247 if (pr == 0)
248 panic("ip6_init");
249
250 /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
251 for (i = 0; i < IPPROTO_MAX; i++)
252 ip6_protox[i] = pr - inet6sw;
253 /*
254 * Cycle through IP protocols and put them into the appropriate place
255 * in ip6_protox[].
256 */
257 for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
258 pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
259 if (pr->pr_domain->dom_family == PF_INET6 &&
260 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
261 /* Be careful to only index valid IP protocols. */
262 if (pr->pr_protocol < IPPROTO_MAX)
263 ip6_protox[pr->pr_protocol] = pr - inet6sw;
264 }
265
266 /* Initialize packet filter hooks. */
267 inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
268 inet6_pfil_hook.ph_af = AF_INET6;
269 if ((i = pfil_head_register(&inet6_pfil_hook)) != 0)
270 printf("%s: WARNING: unable to register pfil hook, "
271 "error %d\n", __func__, i);
272
273 ip6intrq.ifq_maxlen = V_ip6qmaxlen;
274 mtx_init(&ip6intrq.ifq_mtx, "ip6_inq", NULL, MTX_DEF);
275 netisr_register(NETISR_IPV6, ip6_input, &ip6intrq, 0);
276 scope6_init();
277 addrsel_policy_init();
278 nd6_init();
279 frag6_init();
280 V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
281}
282
283static void
284ip6_init2(void *dummy)
285{
286 INIT_VNET_INET6(curvnet);
287
288 /* nd6_timer_init */
289 callout_init(&V_nd6_timer_ch, 0);
290 callout_reset(&V_nd6_timer_ch, hz, nd6_timer, NULL);
291
292 /* timer for regeneranation of temporary addresses randomize ID */
293 callout_init(&V_in6_tmpaddrtimer_ch, 0);
294 callout_reset(&V_in6_tmpaddrtimer_ch,
295 (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
296 V_ip6_temp_regen_advance) * hz,
297 in6_tmpaddrtimer, NULL);
298}
299
300/* cheat */
301/* This must be after route_init(), which is now SI_ORDER_THIRD */
302SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
303
304extern struct route_in6 ip6_forward_rt;
305
306void
307ip6_input(struct mbuf *m)
308{
309 INIT_VNET_NET(curvnet);
310 INIT_VNET_INET6(curvnet);
311 struct ip6_hdr *ip6;
312 int off = sizeof(struct ip6_hdr), nest;
313 u_int32_t plen;
314 u_int32_t rtalert = ~0;
315 int nxt, ours = 0;
316 struct ifnet *deliverifp = NULL;
317 struct in6_addr odst;
318 int srcrt = 0;
319
320#ifdef IPSEC
321 /*
322 * should the inner packet be considered authentic?
323 * see comment in ah4_input().
324 * NB: m cannot be NULL when passed to the input routine
325 */
326
327 m->m_flags &= ~M_AUTHIPHDR;
328 m->m_flags &= ~M_AUTHIPDGM;
329
330#endif /* IPSEC */
331
332 /*
333 * make sure we don't have onion peering information into m_tag.
334 */
335 ip6_delaux(m);
336
337 /*
338 * mbuf statistics
339 */
340 if (m->m_flags & M_EXT) {
341 if (m->m_next)
342 V_ip6stat.ip6s_mext2m++;
343 else
344 V_ip6stat.ip6s_mext1++;
345 } else {
346#define M2MMAX (sizeof(V_ip6stat.ip6s_m2m)/sizeof(V_ip6stat.ip6s_m2m[0]))
347 if (m->m_next) {
348 if (m->m_flags & M_LOOP) {
349 V_ip6stat.ip6s_m2m[V_loif[0].if_index]++; /* XXX */
350 } else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
351 V_ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
352 else
353 V_ip6stat.ip6s_m2m[0]++;
354 } else
355 V_ip6stat.ip6s_m1++;
356#undef M2MMAX
357 }
358
359 /* drop the packet if IPv6 operation is disabled on the IF */
360 if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) {
361 m_freem(m);
362 return;
363 }
364
365 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
366 V_ip6stat.ip6s_total++;
367
368#ifndef PULLDOWN_TEST
369 /*
370 * L2 bridge code and some other code can return mbuf chain
371 * that does not conform to KAME requirement. too bad.
372 * XXX: fails to join if interface MTU > MCLBYTES. jumbogram?
373 */
374 if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
375 struct mbuf *n;
376
377 MGETHDR(n, M_DONTWAIT, MT_HEADER);
378 if (n)
379 M_MOVE_PKTHDR(n, m);
380 if (n && n->m_pkthdr.len > MHLEN) {
381 MCLGET(n, M_DONTWAIT);
382 if ((n->m_flags & M_EXT) == 0) {
383 m_freem(n);
384 n = NULL;
385 }
386 }
387 if (n == NULL) {
388 m_freem(m);
389 return; /* ENOBUFS */
390 }
391
392 m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
393 n->m_len = n->m_pkthdr.len;
394 m_freem(m);
395 m = n;
396 }
397 IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
398#endif
399
400 if (m->m_len < sizeof(struct ip6_hdr)) {
401 struct ifnet *inifp;
402 inifp = m->m_pkthdr.rcvif;
403 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
404 V_ip6stat.ip6s_toosmall++;
405 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
406 return;
407 }
408 }
409
410 ip6 = mtod(m, struct ip6_hdr *);
411
412 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
413 V_ip6stat.ip6s_badvers++;
414 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
415 goto bad;
416 }
417
418 V_ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
419
420 /*
421 * Check against address spoofing/corruption.
422 */
423 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
424 IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
425 /*
426 * XXX: "badscope" is not very suitable for a multicast source.
427 */
428 V_ip6stat.ip6s_badscope++;
429 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
430 goto bad;
431 }
432 if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
433 !(m->m_flags & M_LOOP)) {
434 /*
435 * In this case, the packet should come from the loopback
436 * interface. However, we cannot just check the if_flags,
437 * because ip6_mloopback() passes the "actual" interface
438 * as the outgoing/incoming interface.
439 */
440 V_ip6stat.ip6s_badscope++;
441 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
442 goto bad;
443 }
444
445#ifdef ALTQ
446 if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
447 /* packet is dropped by traffic conditioner */
448 return;
449 }
450#endif
451 /*
452 * The following check is not documented in specs. A malicious
453 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
454 * and bypass security checks (act as if it was from 127.0.0.1 by using
455 * IPv6 src ::ffff:127.0.0.1). Be cautious.
456 *
457 * This check chokes if we are in an SIIT cloud. As none of BSDs
458 * support IPv4-less kernel compilation, we cannot support SIIT
459 * environment at all. So, it makes more sense for us to reject any
460 * malicious packets for non-SIIT environment, than try to do a
461 * partial support for SIIT environment.
462 */
463 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
464 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
465 V_ip6stat.ip6s_badscope++;
466 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
467 goto bad;
468 }
469#if 0
470 /*
471 * Reject packets with IPv4 compatible addresses (auto tunnel).
472 *
473 * The code forbids auto tunnel relay case in RFC1933 (the check is
474 * stronger than RFC1933). We may want to re-enable it if mech-xx
475 * is revised to forbid relaying case.
476 */
477 if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
478 IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
479 V_ip6stat.ip6s_badscope++;
480 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
481 goto bad;
482 }
483#endif
484
485 /*
486 * Run through list of hooks for input packets.
487 *
488 * NB: Beware of the destination address changing
489 * (e.g. by NAT rewriting). When this happens,
490 * tell ip6_forward to do the right thing.
491 */
492 odst = ip6->ip6_dst;
493
494 /* Jump over all PFIL processing if hooks are not active. */
495 if (!PFIL_HOOKED(&inet6_pfil_hook))
496 goto passin;
497
498 if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL))
499 return;
500 if (m == NULL) /* consumed by filter */
501 return;
502 ip6 = mtod(m, struct ip6_hdr *);
503 srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
504
505passin:
506 /*
507 * Disambiguate address scope zones (if there is ambiguity).
508 * We first make sure that the original source or destination address
509 * is not in our internal form for scoped addresses. Such addresses
510 * are not necessarily invalid spec-wise, but we cannot accept them due
511 * to the usage conflict.
512 * in6_setscope() then also checks and rejects the cases where src or
513 * dst are the loopback address and the receiving interface
514 * is not loopback.
515 */
516 if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
517 V_ip6stat.ip6s_badscope++; /* XXX */
518 goto bad;
519 }
520 if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) ||
521 in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) {
522 V_ip6stat.ip6s_badscope++;
523 goto bad;
524 }
525
526 /*
527 * Multicast check
528 */
529 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
530 struct in6_multi *in6m = 0;
531
532 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
533 /*
534 * See if we belong to the destination multicast group on the
535 * arrival interface.
536 */
537 IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m);
538 if (in6m)
539 ours = 1;
540 else if (!ip6_mrouter) {
541 V_ip6stat.ip6s_notmember++;
542 V_ip6stat.ip6s_cantforward++;
543 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
544 goto bad;
545 }
546 deliverifp = m->m_pkthdr.rcvif;
547 goto hbhcheck;
548 }
549
550 /*
551 * Unicast check
552 */
553 if (V_ip6_forward_rt.ro_rt != NULL &&
554 (V_ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 &&
555 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
556 &((struct sockaddr_in6 *)(&V_ip6_forward_rt.ro_dst))->sin6_addr))
557 V_ip6stat.ip6s_forward_cachehit++;
558 else {
559 struct sockaddr_in6 *dst6;
560
561 if (V_ip6_forward_rt.ro_rt) {
562 /* route is down or destination is different */
563 V_ip6stat.ip6s_forward_cachemiss++;
564 RTFREE(V_ip6_forward_rt.ro_rt);
565 V_ip6_forward_rt.ro_rt = 0;
566 }
567
568 bzero(&V_ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6));
569 dst6 = (struct sockaddr_in6 *)&V_ip6_forward_rt.ro_dst;
570 dst6->sin6_len = sizeof(struct sockaddr_in6);
571 dst6->sin6_family = AF_INET6;
572 dst6->sin6_addr = ip6->ip6_dst;
573
574 rtalloc((struct route *)&V_ip6_forward_rt);
575 }
576
577#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
578
579 /*
580 * Accept the packet if the forwarding interface to the destination
581 * according to the routing table is the loopback interface,
582 * unless the associated route has a gateway.
583 * Note that this approach causes to accept a packet if there is a
584 * route to the loopback interface for the destination of the packet.
585 * But we think it's even useful in some situations, e.g. when using
586 * a special daemon which wants to intercept the packet.
587 *
588 * XXX: some OSes automatically make a cloned route for the destination
589 * of an outgoing packet. If the outgoing interface of the packet
590 * is a loopback one, the kernel would consider the packet to be
591 * accepted, even if we have no such address assinged on the interface.
592 * We check the cloned flag of the route entry to reject such cases,
593 * assuming that route entries for our own addresses are not made by
594 * cloning (it should be true because in6_addloop explicitly installs
595 * the host route). However, we might have to do an explicit check
596 * while it would be less efficient. Or, should we rather install a
597 * reject route for such a case?
598 */
599 if (V_ip6_forward_rt.ro_rt &&
600 (V_ip6_forward_rt.ro_rt->rt_flags &
601 (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
602#ifdef RTF_WASCLONED
603 !(V_ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) &&
604#endif
605#ifdef RTF_CLONED
606 !(V_ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) &&
607#endif
608#if 0
609 /*
610 * The check below is redundant since the comparison of
611 * the destination and the key of the rtentry has
612 * already done through looking up the routing table.
613 */
614 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
615 &rt6_key(V_ip6_forward_rt.ro_rt)->sin6_addr)
616#endif
617 V_ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) {
618 struct in6_ifaddr *ia6 =
619 (struct in6_ifaddr *)V_ip6_forward_rt.ro_rt->rt_ifa;
620
621 /*
622 * record address information into m_tag.
623 */
624 (void)ip6_setdstifaddr(m, ia6);
625
626 /*
627 * packets to a tentative, duplicated, or somehow invalid
628 * address must not be accepted.
629 */
630 if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
631 /* this address is ready */
632 ours = 1;
633 deliverifp = ia6->ia_ifp; /* correct? */
634 /* Count the packet in the ip address stats */
635 ia6->ia_ifa.if_ipackets++;
636 ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
637 goto hbhcheck;
638 } else {
639 char ip6bufs[INET6_ADDRSTRLEN];
640 char ip6bufd[INET6_ADDRSTRLEN];
641 /* address is not ready, so discard the packet. */
642 nd6log((LOG_INFO,
643 "ip6_input: packet to an unready address %s->%s\n",
644 ip6_sprintf(ip6bufs, &ip6->ip6_src),
645 ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
646
647 goto bad;
648 }
649 }
650
651 /*
652 * FAITH (Firewall Aided Internet Translator)
653 */
654 if (V_ip6_keepfaith) {
655 if (V_ip6_forward_rt.ro_rt && V_ip6_forward_rt.ro_rt->rt_ifp
656 && V_ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) {
657 /* XXX do we need more sanity checks? */
658 ours = 1;
659 deliverifp = V_ip6_forward_rt.ro_rt->rt_ifp; /* faith */
660 goto hbhcheck;
661 }
662 }
663
664 /*
665 * Now there is no reason to process the packet if it's not our own
666 * and we're not a router.
667 */
668 if (!V_ip6_forwarding) {
669 V_ip6stat.ip6s_cantforward++;
670 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
671 goto bad;
672 }
673
674 hbhcheck:
675 /*
676 * record address information into m_tag, if we don't have one yet.
677 * note that we are unable to record it, if the address is not listed
678 * as our interface address (e.g. multicast addresses, addresses
679 * within FAITH prefixes and such).
680 */
681 if (deliverifp && !ip6_getdstifaddr(m)) {
682 struct in6_ifaddr *ia6;
683
684 ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
685 if (ia6) {
686 if (!ip6_setdstifaddr(m, ia6)) {
687 /*
688 * XXX maybe we should drop the packet here,
689 * as we could not provide enough information
690 * to the upper layers.
691 */
692 }
693 }
694 }
695
696 /*
697 * Process Hop-by-Hop options header if it's contained.
698 * m may be modified in ip6_hopopts_input().
699 * If a JumboPayload option is included, plen will also be modified.
700 */
701 plen = (u_int32_t)ntohs(ip6->ip6_plen);
702 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
703 struct ip6_hbh *hbh;
704
705 if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
706#if 0 /*touches NULL pointer*/
707 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
708#endif
709 return; /* m have already been freed */
710 }
711
712 /* adjust pointer */
713 ip6 = mtod(m, struct ip6_hdr *);
714
715 /*
716 * if the payload length field is 0 and the next header field
717 * indicates Hop-by-Hop Options header, then a Jumbo Payload
718 * option MUST be included.
719 */
720 if (ip6->ip6_plen == 0 && plen == 0) {
721 /*
722 * Note that if a valid jumbo payload option is
723 * contained, ip6_hopopts_input() must set a valid
724 * (non-zero) payload length to the variable plen.
725 */
726 V_ip6stat.ip6s_badoptions++;
727 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
728 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
729 icmp6_error(m, ICMP6_PARAM_PROB,
730 ICMP6_PARAMPROB_HEADER,
731 (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
732 return;
733 }
734#ifndef PULLDOWN_TEST
735 /* ip6_hopopts_input() ensures that mbuf is contiguous */
736 hbh = (struct ip6_hbh *)(ip6 + 1);
737#else
738 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
739 sizeof(struct ip6_hbh));
740 if (hbh == NULL) {
741 V_ip6stat.ip6s_tooshort++;
742 return;
743 }
744#endif
745 nxt = hbh->ip6h_nxt;
746
747 /*
748 * If we are acting as a router and the packet contains a
749 * router alert option, see if we know the option value.
750 * Currently, we only support the option value for MLD, in which
751 * case we should pass the packet to the multicast routing
752 * daemon.
753 */
754 if (rtalert != ~0 && V_ip6_forwarding) {
755 switch (rtalert) {
756 case IP6OPT_RTALERT_MLD:
757 ours = 1;
758 break;
759 default:
760 /*
761 * RFC2711 requires unrecognized values must be
762 * silently ignored.
763 */
764 break;
765 }
766 }
767 } else
768 nxt = ip6->ip6_nxt;
769
770 /*
771 * Check that the amount of data in the buffers
772 * is as at least much as the IPv6 header would have us expect.
773 * Trim mbufs if longer than we expect.
774 * Drop packet if shorter than we expect.
775 */
776 if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
777 V_ip6stat.ip6s_tooshort++;
778 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
779 goto bad;
780 }
781 if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
782 if (m->m_len == m->m_pkthdr.len) {
783 m->m_len = sizeof(struct ip6_hdr) + plen;
784 m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
785 } else
786 m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
787 }
788
789 /*
790 * Forward if desirable.
791 */
792 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
793 /*
794 * If we are acting as a multicast router, all
795 * incoming multicast packets are passed to the
796 * kernel-level multicast forwarding function.
797 * The packet is returned (relatively) intact; if
798 * ip6_mforward() returns a non-zero value, the packet
799 * must be discarded, else it may be accepted below.
800 */
801 if (ip6_mrouter && ip6_mforward &&
802 ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
803 V_ip6stat.ip6s_cantforward++;
804 m_freem(m);
805 return;
806 }
807 if (!ours) {
808 m_freem(m);
809 return;
810 }
811 } else if (!ours) {
812 ip6_forward(m, srcrt);
813 return;
814 }
815
816 ip6 = mtod(m, struct ip6_hdr *);
817
818 /*
819 * Malicious party may be able to use IPv4 mapped addr to confuse
820 * tcp/udp stack and bypass security checks (act as if it was from
821 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious.
822 *
823 * For SIIT end node behavior, you may want to disable the check.
824 * However, you will become vulnerable to attacks using IPv4 mapped
825 * source.
826 */
827 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
828 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
829 V_ip6stat.ip6s_badscope++;
830 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
831 goto bad;
832 }
833
834 /*
835 * Tell launch routine the next header
836 */
837 V_ip6stat.ip6s_delivered++;
838 in6_ifstat_inc(deliverifp, ifs6_in_deliver);
839 nest = 0;
840
841 while (nxt != IPPROTO_DONE) {
842 if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
843 V_ip6stat.ip6s_toomanyhdr++;
844 goto bad;
845 }
846
847 /*
848 * protection against faulty packet - there should be
849 * more sanity checks in header chain processing.
850 */
851 if (m->m_pkthdr.len < off) {
852 V_ip6stat.ip6s_tooshort++;
853 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
854 goto bad;
855 }
856
857#ifdef IPSEC
858 /*
859 * enforce IPsec policy checking if we are seeing last header.
860 * note that we do not visit this with protocols with pcb layer
861 * code - like udp/tcp/raw ip.
862 */
863 if (ip6_ipsec_input(m, nxt))
864 goto bad;
865#endif /* IPSEC */
866 nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
867 }
868 return;
869 bad:
870 m_freem(m);
871}
872
873/*
874 * set/grab in6_ifaddr correspond to IPv6 destination address.
875 * XXX backward compatibility wrapper
876 */
877static struct ip6aux *
878ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
879{
880 struct ip6aux *ip6a;
881
882 ip6a = ip6_addaux(m);
883 if (ip6a)
884 ip6a->ip6a_dstia6 = ia6;
885 return ip6a; /* NULL if failed to set */
886}
887
888struct in6_ifaddr *
889ip6_getdstifaddr(struct mbuf *m)
890{
891 struct ip6aux *ip6a;
892
893 ip6a = ip6_findaux(m);
894 if (ip6a)
895 return ip6a->ip6a_dstia6;
896 else
897 return NULL;
898}
899
900/*
901 * Hop-by-Hop options header processing. If a valid jumbo payload option is
902 * included, the real payload length will be stored in plenp.
903 *
904 * rtalertp - XXX: should be stored more smart way
905 */
906static int
907ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
908 struct mbuf **mp, int *offp)
909{
910 INIT_VNET_INET6(curvnet);
911 struct mbuf *m = *mp;
912 int off = *offp, hbhlen;
913 struct ip6_hbh *hbh;
914 u_int8_t *opt;
915
916 /* validation of the length of the header */
917#ifndef PULLDOWN_TEST
918 IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
919 hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
920 hbhlen = (hbh->ip6h_len + 1) << 3;
921
922 IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
923 hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
924#else
925 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
926 sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
927 if (hbh == NULL) {
928 V_ip6stat.ip6s_tooshort++;
929 return -1;
930 }
931 hbhlen = (hbh->ip6h_len + 1) << 3;
932 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
933 hbhlen);
934 if (hbh == NULL) {
935 V_ip6stat.ip6s_tooshort++;
936 return -1;
937 }
938#endif
939 off += hbhlen;
940 hbhlen -= sizeof(struct ip6_hbh);
941 opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
942
943 if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
944 hbhlen, rtalertp, plenp) < 0)
945 return (-1);
946
947 *offp = off;
948 *mp = m;
949 return (0);
950}
951
952/*
953 * Search header for all Hop-by-hop options and process each option.
954 * This function is separate from ip6_hopopts_input() in order to
955 * handle a case where the sending node itself process its hop-by-hop
956 * options header. In such a case, the function is called from ip6_output().
957 *
958 * The function assumes that hbh header is located right after the IPv6 header
959 * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
960 * opthead + hbhlen is located in continuous memory region.
961 */
962int
963ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
964 u_int32_t *rtalertp, u_int32_t *plenp)
965{
966 INIT_VNET_INET6(curvnet);
967 struct ip6_hdr *ip6;
968 int optlen = 0;
969 u_int8_t *opt = opthead;
970 u_int16_t rtalert_val;
971 u_int32_t jumboplen;
972 const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
973
974 for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
975 switch (*opt) {
976 case IP6OPT_PAD1:
977 optlen = 1;
978 break;
979 case IP6OPT_PADN:
980 if (hbhlen < IP6OPT_MINLEN) {
981 V_ip6stat.ip6s_toosmall++;
982 goto bad;
983 }
984 optlen = *(opt + 1) + 2;
985 break;
986 case IP6OPT_ROUTER_ALERT:
987 /* XXX may need check for alignment */
988 if (hbhlen < IP6OPT_RTALERT_LEN) {
989 V_ip6stat.ip6s_toosmall++;
990 goto bad;
991 }
992 if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
993 /* XXX stat */
994 icmp6_error(m, ICMP6_PARAM_PROB,
995 ICMP6_PARAMPROB_HEADER,
996 erroff + opt + 1 - opthead);
997 return (-1);
998 }
999 optlen = IP6OPT_RTALERT_LEN;
1000 bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
1001 *rtalertp = ntohs(rtalert_val);
1002 break;
1003 case IP6OPT_JUMBO:
1004 /* XXX may need check for alignment */
1005 if (hbhlen < IP6OPT_JUMBO_LEN) {
1006 V_ip6stat.ip6s_toosmall++;
1007 goto bad;
1008 }
1009 if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
1010 /* XXX stat */
1011 icmp6_error(m, ICMP6_PARAM_PROB,
1012 ICMP6_PARAMPROB_HEADER,
1013 erroff + opt + 1 - opthead);
1014 return (-1);
1015 }
1016 optlen = IP6OPT_JUMBO_LEN;
1017
1018 /*
1019 * IPv6 packets that have non 0 payload length
1020 * must not contain a jumbo payload option.
1021 */
1022 ip6 = mtod(m, struct ip6_hdr *);
1023 if (ip6->ip6_plen) {
1024 V_ip6stat.ip6s_badoptions++;
1025 icmp6_error(m, ICMP6_PARAM_PROB,
1026 ICMP6_PARAMPROB_HEADER,
1027 erroff + opt - opthead);
1028 return (-1);
1029 }
1030
1031 /*
1032 * We may see jumbolen in unaligned location, so
1033 * we'd need to perform bcopy().
1034 */
1035 bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
1036 jumboplen = (u_int32_t)htonl(jumboplen);
1037
1038#if 1
1039 /*
1040 * if there are multiple jumbo payload options,
1041 * *plenp will be non-zero and the packet will be
1042 * rejected.
1043 * the behavior may need some debate in ipngwg -
1044 * multiple options does not make sense, however,
1045 * there's no explicit mention in specification.
1046 */
1047 if (*plenp != 0) {
1048 V_ip6stat.ip6s_badoptions++;
1049 icmp6_error(m, ICMP6_PARAM_PROB,
1050 ICMP6_PARAMPROB_HEADER,
1051 erroff + opt + 2 - opthead);
1052 return (-1);
1053 }
1054#endif
1055
1056 /*
1057 * jumbo payload length must be larger than 65535.
1058 */
1059 if (jumboplen <= IPV6_MAXPACKET) {
1060 V_ip6stat.ip6s_badoptions++;
1061 icmp6_error(m, ICMP6_PARAM_PROB,
1062 ICMP6_PARAMPROB_HEADER,
1063 erroff + opt + 2 - opthead);
1064 return (-1);
1065 }
1066 *plenp = jumboplen;
1067
1068 break;
1069 default: /* unknown option */
1070 if (hbhlen < IP6OPT_MINLEN) {
1071 V_ip6stat.ip6s_toosmall++;
1072 goto bad;
1073 }
1074 optlen = ip6_unknown_opt(opt, m,
1075 erroff + opt - opthead);
1076 if (optlen == -1)
1077 return (-1);
1078 optlen += 2;
1079 break;
1080 }
1081 }
1082
1083 return (0);
1084
1085 bad:
1086 m_freem(m);
1087 return (-1);
1088}
1089
1090/*
1091 * Unknown option processing.
1092 * The third argument `off' is the offset from the IPv6 header to the option,
1093 * which is necessary if the IPv6 header the and option header and IPv6 header
1094 * is not continuous in order to return an ICMPv6 error.
1095 */
1096int
1097ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
1098{
1099 INIT_VNET_INET6(curvnet);
1100 struct ip6_hdr *ip6;
1101
1102 switch (IP6OPT_TYPE(*optp)) {
1103 case IP6OPT_TYPE_SKIP: /* ignore the option */
1104 return ((int)*(optp + 1));
1105 case IP6OPT_TYPE_DISCARD: /* silently discard */
1106 m_freem(m);
1107 return (-1);
1108 case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
1109 V_ip6stat.ip6s_badoptions++;
1110 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
1111 return (-1);
1112 case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
1113 V_ip6stat.ip6s_badoptions++;
1114 ip6 = mtod(m, struct ip6_hdr *);
1115 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
1116 (m->m_flags & (M_BCAST|M_MCAST)))
1117 m_freem(m);
1118 else
1119 icmp6_error(m, ICMP6_PARAM_PROB,
1120 ICMP6_PARAMPROB_OPTION, off);
1121 return (-1);
1122 }
1123
1124 m_freem(m); /* XXX: NOTREACHED */
1125 return (-1);
1126}
1127
1128/*
1129 * Create the "control" list for this pcb.
1130 * These functions will not modify mbuf chain at all.
1131 *
1132 * With KAME mbuf chain restriction:
1133 * The routine will be called from upper layer handlers like tcp6_input().
1134 * Thus the routine assumes that the caller (tcp6_input) have already
1135 * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
1136 * very first mbuf on the mbuf chain.
1137 *
1138 * ip6_savecontrol_v4 will handle those options that are possible to be
1139 * set on a v4-mapped socket.
1140 * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those
1141 * options and handle the v6-only ones itself.
1142 */
1143struct mbuf **
1144ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
1145 int *v4only)
1146{
1147 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1148
1149#ifdef SO_TIMESTAMP
1150 if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) {
1151 struct timeval tv;
1152
1153 microtime(&tv);
1154 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1155 SCM_TIMESTAMP, SOL_SOCKET);
1156 if (*mp)
1157 mp = &(*mp)->m_next;
1158 }
1159#endif
1160
1161 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
1162 if (v4only != NULL)
1163 *v4only = 1;
1164 return (mp);
1165 }
1166
1167#define IS2292(inp, x, y) (((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y))
1168 /* RFC 2292 sec. 5 */
1169 if ((inp->inp_flags & IN6P_PKTINFO) != 0) {
1170 struct in6_pktinfo pi6;
1171
1172 bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
1173 in6_clearscope(&pi6.ipi6_addr); /* XXX */
1174 pi6.ipi6_ifindex =
1175 (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
1176
1177 *mp = sbcreatecontrol((caddr_t) &pi6,
1178 sizeof(struct in6_pktinfo),
1179 IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
1180 if (*mp)
1181 mp = &(*mp)->m_next;
1182 }
1183
1184 if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) {
1185 int hlim = ip6->ip6_hlim & 0xff;
1186
1187 *mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
1188 IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT),
1189 IPPROTO_IPV6);
1190 if (*mp)
1191 mp = &(*mp)->m_next;
1192 }
1193
1194 if (v4only != NULL)
1195 *v4only = 0;
1196 return (mp);
1197}
1198
1199void
1200ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
1201{
1202 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1203 int v4only = 0;
1204
1205 mp = ip6_savecontrol_v4(in6p, m, mp, &v4only);
1206 if (v4only)
1207 return;
1208
1209 if ((in6p->in6p_flags & IN6P_TCLASS) != 0) {
1210 u_int32_t flowinfo;
1211 int tclass;
1212
1213 flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
1214 flowinfo >>= 20;
1215
1216 tclass = flowinfo & 0xff;
1217 *mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
1218 IPV6_TCLASS, IPPROTO_IPV6);
1219 if (*mp)
1220 mp = &(*mp)->m_next;
1221 }
1222
1223 /*
1224 * IPV6_HOPOPTS socket option. Recall that we required super-user
1225 * privilege for the option (see ip6_ctloutput), but it might be too
1226 * strict, since there might be some hop-by-hop options which can be
1227 * returned to normal user.
1228 * See also RFC 2292 section 6 (or RFC 3542 section 8).
1229 */
1230 if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0) {
1231 /*
1232 * Check if a hop-by-hop options header is contatined in the
1233 * received packet, and if so, store the options as ancillary
1234 * data. Note that a hop-by-hop options header must be
1235 * just after the IPv6 header, which is assured through the
1236 * IPv6 input processing.
1237 */
1238 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
1239 struct ip6_hbh *hbh;
1240 int hbhlen = 0;
1241#ifdef PULLDOWN_TEST
1242 struct mbuf *ext;
1243#endif
1244
1245#ifndef PULLDOWN_TEST
1246 hbh = (struct ip6_hbh *)(ip6 + 1);
1247 hbhlen = (hbh->ip6h_len + 1) << 3;
1248#else
1249 ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
1250 ip6->ip6_nxt);
1251 if (ext == NULL) {
1252 V_ip6stat.ip6s_tooshort++;
1253 return;
1254 }
1255 hbh = mtod(ext, struct ip6_hbh *);
1256 hbhlen = (hbh->ip6h_len + 1) << 3;
1257 if (hbhlen != ext->m_len) {
1258 m_freem(ext);
1259 V_ip6stat.ip6s_tooshort++;
1260 return;
1261 }
1262#endif
1263
1264 /*
1265 * XXX: We copy the whole header even if a
1266 * jumbo payload option is included, the option which
1267 * is to be removed before returning according to
1268 * RFC2292.
1269 * Note: this constraint is removed in RFC3542
1270 */
1271 *mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
1272 IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
1273 IPPROTO_IPV6);
1274 if (*mp)
1275 mp = &(*mp)->m_next;
1276#ifdef PULLDOWN_TEST
1277 m_freem(ext);
1278#endif
1279 }
1280 }
1281
1282 if ((in6p->in6p_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
1283 int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
1284
1285 /*
1286 * Search for destination options headers or routing
1287 * header(s) through the header chain, and stores each
1288 * header as ancillary data.
1289 * Note that the order of the headers remains in
1290 * the chain of ancillary data.
1291 */
1292 while (1) { /* is explicit loop prevention necessary? */
1293 struct ip6_ext *ip6e = NULL;
1294 int elen;
1295#ifdef PULLDOWN_TEST
1296 struct mbuf *ext = NULL;
1297#endif
1298
1299 /*
1300 * if it is not an extension header, don't try to
1301 * pull it from the chain.
1302 */
1303 switch (nxt) {
1304 case IPPROTO_DSTOPTS:
1305 case IPPROTO_ROUTING:
1306 case IPPROTO_HOPOPTS:
1307 case IPPROTO_AH: /* is it possible? */
1308 break;
1309 default:
1310 goto loopend;
1311 }
1312
1313#ifndef PULLDOWN_TEST
1314 if (off + sizeof(*ip6e) > m->m_len)
1315 goto loopend;
1316 ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
1317 if (nxt == IPPROTO_AH)
1318 elen = (ip6e->ip6e_len + 2) << 2;
1319 else
1320 elen = (ip6e->ip6e_len + 1) << 3;
1321 if (off + elen > m->m_len)
1322 goto loopend;
1323#else
1324 ext = ip6_pullexthdr(m, off, nxt);
1325 if (ext == NULL) {
1326 V_ip6stat.ip6s_tooshort++;
1327 return;
1328 }
1329 ip6e = mtod(ext, struct ip6_ext *);
1330 if (nxt == IPPROTO_AH)
1331 elen = (ip6e->ip6e_len + 2) << 2;
1332 else
1333 elen = (ip6e->ip6e_len + 1) << 3;
1334 if (elen != ext->m_len) {
1335 m_freem(ext);
1336 V_ip6stat.ip6s_tooshort++;
1337 return;
1338 }
1339#endif
1340
1341 switch (nxt) {
1342 case IPPROTO_DSTOPTS:
1343 if (!(in6p->in6p_flags & IN6P_DSTOPTS))
1344 break;
1345
1346 *mp = sbcreatecontrol((caddr_t)ip6e, elen,
1347 IS2292(in6p,
1348 IPV6_2292DSTOPTS, IPV6_DSTOPTS),
1349 IPPROTO_IPV6);
1350 if (*mp)
1351 mp = &(*mp)->m_next;
1352 break;
1353 case IPPROTO_ROUTING:
1354 if (!in6p->in6p_flags & IN6P_RTHDR)
1355 break;
1356
1357 *mp = sbcreatecontrol((caddr_t)ip6e, elen,
1358 IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR),
1359 IPPROTO_IPV6);
1360 if (*mp)
1361 mp = &(*mp)->m_next;
1362 break;
1363 case IPPROTO_HOPOPTS:
1364 case IPPROTO_AH: /* is it possible? */
1365 break;
1366
1367 default:
1368 /*
1369 * other cases have been filtered in the above.
1370 * none will visit this case. here we supply
1371 * the code just in case (nxt overwritten or
1372 * other cases).
1373 */
1374#ifdef PULLDOWN_TEST
1375 m_freem(ext);
1376#endif
1377 goto loopend;
1378
1379 }
1380
1381 /* proceed with the next header. */
1382 off += elen;
1383 nxt = ip6e->ip6e_nxt;
1384 ip6e = NULL;
1385#ifdef PULLDOWN_TEST
1386 m_freem(ext);
1387 ext = NULL;
1388#endif
1389 }
1390 loopend:
1391 ;
1392 }
1393}
1394#undef IS2292
1395
1396void
1397ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
1398{
1399 struct socket *so;
1400 struct mbuf *m_mtu;
1401 struct ip6_mtuinfo mtuctl;
1402
1403 so = in6p->inp_socket;
1404
1405 if (mtu == NULL)
1406 return;
1407
1408#ifdef DIAGNOSTIC
1409 if (so == NULL) /* I believe this is impossible */
1410 panic("ip6_notify_pmtu: socket is NULL");
1411#endif
1412
1413 bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */
1414 mtuctl.ip6m_mtu = *mtu;
1415 mtuctl.ip6m_addr = *dst;
1416 if (sa6_recoverscope(&mtuctl.ip6m_addr))
1417 return;
1418
1419 if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
1420 IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
1421 return;
1422
1423 if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
1424 == 0) {
1425 m_freem(m_mtu);
1426 /* XXX: should count statistics */
1427 } else
1428 sorwakeup(so);
1429
1430 return;
1431}
1432
1433#ifdef PULLDOWN_TEST
1434/*
1435 * pull single extension header from mbuf chain. returns single mbuf that
1436 * contains the result, or NULL on error.
1437 */
1438static struct mbuf *
1439ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
1440{
1441 struct ip6_ext ip6e;
1442 size_t elen;
1443 struct mbuf *n;
1444
1445#ifdef DIAGNOSTIC
1446 switch (nxt) {
1447 case IPPROTO_DSTOPTS:
1448 case IPPROTO_ROUTING:
1449 case IPPROTO_HOPOPTS:
1450 case IPPROTO_AH: /* is it possible? */
1451 break;
1452 default:
1453 printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
1454 }
1455#endif
1456
1457 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1458 if (nxt == IPPROTO_AH)
1459 elen = (ip6e.ip6e_len + 2) << 2;
1460 else
1461 elen = (ip6e.ip6e_len + 1) << 3;
1462
1463 MGET(n, M_DONTWAIT, MT_DATA);
1464 if (n && elen >= MLEN) {
1465 MCLGET(n, M_DONTWAIT);
1466 if ((n->m_flags & M_EXT) == 0) {
1467 m_free(n);
1468 n = NULL;
1469 }
1470 }
1471 if (!n)
1472 return NULL;
1473
1474 n->m_len = 0;
1475 if (elen >= M_TRAILINGSPACE(n)) {
1476 m_free(n);
1477 return NULL;
1478 }
1479
1480 m_copydata(m, off, elen, mtod(n, caddr_t));
1481 n->m_len = elen;
1482 return n;
1483}
1484#endif
1485
1486/*
1487 * Get pointer to the previous header followed by the header
1488 * currently processed.
1489 * XXX: This function supposes that
1490 * M includes all headers,
1491 * the next header field and the header length field of each header
1492 * are valid, and
1493 * the sum of each header length equals to OFF.
1494 * Because of these assumptions, this function must be called very
1495 * carefully. Moreover, it will not be used in the near future when
1496 * we develop `neater' mechanism to process extension headers.
1497 */
1498char *
1499ip6_get_prevhdr(struct mbuf *m, int off)
1500{
1501 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1502
1503 if (off == sizeof(struct ip6_hdr))
1504 return (&ip6->ip6_nxt);
1505 else {
1506 int len, nxt;
1507 struct ip6_ext *ip6e = NULL;
1508
1509 nxt = ip6->ip6_nxt;
1510 len = sizeof(struct ip6_hdr);
1511 while (len < off) {
1512 ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
1513
1514 switch (nxt) {
1515 case IPPROTO_FRAGMENT:
1516 len += sizeof(struct ip6_frag);
1517 break;
1518 case IPPROTO_AH:
1519 len += (ip6e->ip6e_len + 2) << 2;
1520 break;
1521 default:
1522 len += (ip6e->ip6e_len + 1) << 3;
1523 break;
1524 }
1525 nxt = ip6e->ip6e_nxt;
1526 }
1527 if (ip6e)
1528 return (&ip6e->ip6e_nxt);
1529 else
1530 return NULL;
1531 }
1532}
1533
1534/*
1535 * get next header offset. m will be retained.
1536 */
1537int
1538ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
1539{
1540 struct ip6_hdr ip6;
1541 struct ip6_ext ip6e;
1542 struct ip6_frag fh;
1543
1544 /* just in case */
1545 if (m == NULL)
1546 panic("ip6_nexthdr: m == NULL");
1547 if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
1548 return -1;
1549
1550 switch (proto) {
1551 case IPPROTO_IPV6:
1552 if (m->m_pkthdr.len < off + sizeof(ip6))
1553 return -1;
1554 m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
1555 if (nxtp)
1556 *nxtp = ip6.ip6_nxt;
1557 off += sizeof(ip6);
1558 return off;
1559
1560 case IPPROTO_FRAGMENT:
1561 /*
1562 * terminate parsing if it is not the first fragment,
1563 * it does not make sense to parse through it.
1564 */
1565 if (m->m_pkthdr.len < off + sizeof(fh))
1566 return -1;
1567 m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
1568 /* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
1569 if (fh.ip6f_offlg & IP6F_OFF_MASK)
1570 return -1;
1571 if (nxtp)
1572 *nxtp = fh.ip6f_nxt;
1573 off += sizeof(struct ip6_frag);
1574 return off;
1575
1576 case IPPROTO_AH:
1577 if (m->m_pkthdr.len < off + sizeof(ip6e))
1578 return -1;
1579 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1580 if (nxtp)
1581 *nxtp = ip6e.ip6e_nxt;
1582 off += (ip6e.ip6e_len + 2) << 2;
1583 return off;
1584
1585 case IPPROTO_HOPOPTS:
1586 case IPPROTO_ROUTING:
1587 case IPPROTO_DSTOPTS:
1588 if (m->m_pkthdr.len < off + sizeof(ip6e))
1589 return -1;
1590 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1591 if (nxtp)
1592 *nxtp = ip6e.ip6e_nxt;
1593 off += (ip6e.ip6e_len + 1) << 3;
1594 return off;
1595
1596 case IPPROTO_NONE:
1597 case IPPROTO_ESP:
1598 case IPPROTO_IPCOMP:
1599 /* give up */
1600 return -1;
1601
1602 default:
1603 return -1;
1604 }
1605
1606 return -1;
1607}
1608
1609/*
1610 * get offset for the last header in the chain. m will be kept untainted.
1611 */
1612int
1613ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
1614{
1615 int newoff;
1616 int nxt;
1617
1618 if (!nxtp) {
1619 nxt = -1;
1620 nxtp = &nxt;
1621 }
1622 while (1) {
1623 newoff = ip6_nexthdr(m, off, proto, nxtp);
1624 if (newoff < 0)
1625 return off;
1626 else if (newoff < off)
1627 return -1; /* invalid */
1628 else if (newoff == off)
1629 return newoff;
1630
1631 off = newoff;
1632 proto = *nxtp;
1633 }
1634}
1635
1636struct ip6aux *
1637ip6_addaux(struct mbuf *m)
1638{
1639 struct m_tag *mtag;
1640
1641 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1642 if (!mtag) {
1643 mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
1644 M_NOWAIT);
1645 if (mtag) {
1646 m_tag_prepend(m, mtag);
1647 bzero(mtag + 1, sizeof(struct ip6aux));
1648 }
1649 }
1650 return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
1651}
1652
1653struct ip6aux *
1654ip6_findaux(struct mbuf *m)
1655{
1656 struct m_tag *mtag;
1657
1658 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1659 return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
1660}
1661
1662void
1663ip6_delaux(struct mbuf *m)
1664{
1665 struct m_tag *mtag;
1666
1667 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1668 if (mtag)
1669 m_tag_delete(m, mtag);
1670}
1671
1672/*
1673 * System control for IP6
1674 */
1675
1676u_char inet6ctlerrmap[PRC_NCMDS] = {
1677 0, 0, 0, 0,
1678 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1679 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1680 EMSGSIZE, EHOSTUNREACH, 0, 0,
1681 0, 0, 0, 0,
1682 ENOPROTOOPT
1683};