1/* 2 * Copyright (c) 2007-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */ 30/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */ 31 32/* 33 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 46 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 47 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 48 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 49 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 50 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 51 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 52 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 53 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 54 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 55 */ 56 57#include <sys/param.h> 58#include <sys/systm.h> 59#include <sys/mbuf.h> 60#include <sys/filio.h> 61#include <sys/fcntl.h> 62#include <sys/socket.h> 63#include <sys/kernel.h> 64#include <sys/time.h> 65#include <sys/random.h> 66#include <sys/mcache.h> 67 68#include <net/if.h> 69#include <net/if_types.h> 70#include <net/bpf.h> 71#include <net/route.h> 72#include <net/if_pflog.h> 73 74#include <netinet/in.h> 75#include <netinet/in_var.h> 76#include <netinet/in_systm.h> 77#include <netinet/ip.h> 78#include <netinet/ip_var.h> 79#include <netinet/tcp.h> 80#include <netinet/tcp_seq.h> 81#include <netinet/tcp_fsm.h> 82#include <netinet/udp.h> 83#include <netinet/ip_icmp.h> 84 85#if INET6 86#include <netinet/ip6.h> 87#endif /* INET6 */ 88 89#include <net/pfvar.h> 90 91struct pf_frent { 92 LIST_ENTRY(pf_frent) fr_next; 93 struct mbuf *fr_m; 94#define fr_ip fr_u.fru_ipv4 95#define fr_ip6 fr_u.fru_ipv6 96 union { 97 struct ip *fru_ipv4; 98 struct ip6_hdr *fru_ipv6; 99 } fr_u; 100 struct ip6_frag fr_ip6f_opt; 101 int fr_ip6f_hlen; 102}; 103 104struct pf_frcache { 105 LIST_ENTRY(pf_frcache) fr_next; 106 uint16_t fr_off; 107 uint16_t fr_end; 108}; 109 110#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ 111#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ 112#define PFFRAG_DROP 0x0004 /* Drop all fragments */ 113#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) 114 115struct pf_fragment { 116 RB_ENTRY(pf_fragment) fr_entry; 117 TAILQ_ENTRY(pf_fragment) frag_next; 118 struct pf_addr fr_srcx; 119 struct pf_addr fr_dstx; 120 u_int8_t fr_p; /* protocol of this fragment */ 121 u_int8_t fr_flags; /* status flags */ 122 u_int16_t fr_max; /* fragment data max */ 123#define fr_id fr_uid.fru_id4 124#define fr_id6 fr_uid.fru_id6 125 union { 126 u_int16_t fru_id4; 127 u_int32_t fru_id6; 128 } fr_uid; 129 int fr_af; 130 u_int32_t fr_timeout; 131#define fr_queue fr_u.fru_queue 132#define fr_cache fr_u.fru_cache 133 union { 134 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ 135 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ 136 } fr_u; 137}; 138 139static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 140static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; 141 142static __inline int pf_frag_compare(struct pf_fragment *, 143 struct pf_fragment *); 144static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 145RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry, 146 pf_frag_compare); 147RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 148 149/* Private prototypes */ 150static void pf_ip6hdr2key(struct pf_fragment *, struct ip6_hdr *, 151 struct ip6_frag *); 152static void pf_ip2key(struct pf_fragment *, struct ip *); 153static void pf_remove_fragment(struct pf_fragment *); 154static void pf_flush_fragments(void); 155static void pf_free_fragment(struct pf_fragment *); 156static struct pf_fragment *pf_find_fragment_by_key(struct pf_fragment *, 157 struct pf_frag_tree *); 158static __inline struct pf_fragment * 159 pf_find_fragment_by_ipv4_header(struct ip *, struct pf_frag_tree *); 160static __inline struct pf_fragment * 161 pf_find_fragment_by_ipv6_header(struct ip6_hdr *, struct ip6_frag *, 162 struct pf_frag_tree *); 163static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, 164 struct pf_frent *, int); 165static struct mbuf *pf_fragcache(struct mbuf **, struct ip *, 166 struct pf_fragment **, int, int, int *); 167static struct mbuf *pf_reassemble6(struct mbuf **, struct pf_fragment **, 168 struct pf_frent *, int); 169static struct mbuf *pf_frag6cache(struct mbuf **, struct ip6_hdr*, 170 struct ip6_frag *, struct pf_fragment **, int, int, int, int *); 171static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *, 172 struct pf_pdesc *, struct mbuf *, struct tcphdr *, int, int *); 173 174#define DPFPRINTF(x) do { \ 175 if (pf_status.debug >= PF_DEBUG_MISC) { \ 176 printf("%s: ", __func__); \ 177 printf x ; \ 178 } \ 179} while (0) 180 181/* Globals */ 182struct pool pf_frent_pl, pf_frag_pl; 183static struct pool pf_cache_pl, pf_cent_pl; 184struct pool pf_state_scrub_pl; 185 186static int pf_nfrents, pf_ncache; 187 188void 189pf_normalize_init(void) 190{ 191 pool_init(&pf_frent_pl, sizeof (struct pf_frent), 0, 0, 0, "pffrent", 192 NULL); 193 pool_init(&pf_frag_pl, sizeof (struct pf_fragment), 0, 0, 0, "pffrag", 194 NULL); 195 pool_init(&pf_cache_pl, sizeof (struct pf_fragment), 0, 0, 0, 196 "pffrcache", NULL); 197 pool_init(&pf_cent_pl, sizeof (struct pf_frcache), 0, 0, 0, "pffrcent", 198 NULL); 199 pool_init(&pf_state_scrub_pl, sizeof (struct pf_state_scrub), 0, 0, 0, 200 "pfstscr", NULL); 201 202 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 203 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 204 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); 205 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); 206 207 TAILQ_INIT(&pf_fragqueue); 208 TAILQ_INIT(&pf_cachequeue); 209} 210 211#if 0 212void 213pf_normalize_destroy(void) 214{ 215 pool_destroy(&pf_state_scrub_pl); 216 pool_destroy(&pf_cent_pl); 217 pool_destroy(&pf_cache_pl); 218 pool_destroy(&pf_frag_pl); 219 pool_destroy(&pf_frent_pl); 220} 221#endif 222 223int 224pf_normalize_isempty(void) 225{ 226 return (TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue)); 227} 228 229static __inline int 230pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 231{ 232 int diff; 233 234 if ((diff = a->fr_af - b->fr_af)) 235 return (diff); 236 else if ((diff = a->fr_p - b->fr_p)) 237 return (diff); 238 else { 239 struct pf_addr *sa = &a->fr_srcx; 240 struct pf_addr *sb = &b->fr_srcx; 241 struct pf_addr *da = &a->fr_dstx; 242 struct pf_addr *db = &b->fr_dstx; 243 244 switch (a->fr_af) { 245#ifdef INET 246 case AF_INET: 247 if ((diff = a->fr_id - b->fr_id)) 248 return (diff); 249 else if (sa->v4.s_addr < sb->v4.s_addr) 250 return (-1); 251 else if (sa->v4.s_addr > sb->v4.s_addr) 252 return (1); 253 else if (da->v4.s_addr < db->v4.s_addr) 254 return (-1); 255 else if (da->v4.s_addr > db->v4.s_addr) 256 return (1); 257 break; 258#endif 259#ifdef INET6 260 case AF_INET6: 261 if ((diff = a->fr_id6 - b->fr_id6)) 262 return (diff); 263 else if (sa->addr32[3] < sb->addr32[3]) 264 return (-1); 265 else if (sa->addr32[3] > sb->addr32[3]) 266 return (1); 267 else if (sa->addr32[2] < sb->addr32[2]) 268 return (-1); 269 else if (sa->addr32[2] > sb->addr32[2]) 270 return (1); 271 else if (sa->addr32[1] < sb->addr32[1]) 272 return (-1); 273 else if (sa->addr32[1] > sb->addr32[1]) 274 return (1); 275 else if (sa->addr32[0] < sb->addr32[0]) 276 return (-1); 277 else if (sa->addr32[0] > sb->addr32[0]) 278 return (1); 279 else if (da->addr32[3] < db->addr32[3]) 280 return (-1); 281 else if (da->addr32[3] > db->addr32[3]) 282 return (1); 283 else if (da->addr32[2] < db->addr32[2]) 284 return (-1); 285 else if (da->addr32[2] > db->addr32[2]) 286 return (1); 287 else if (da->addr32[1] < db->addr32[1]) 288 return (-1); 289 else if (da->addr32[1] > db->addr32[1]) 290 return (1); 291 else if (da->addr32[0] < db->addr32[0]) 292 return (-1); 293 else if (da->addr32[0] > db->addr32[0]) 294 return (1); 295 break; 296#endif 297 default: 298 VERIFY(!0 && "only IPv4 and IPv6 supported!"); 299 break; 300 } 301 } 302 return (0); 303} 304 305void 306pf_purge_expired_fragments(void) 307{ 308 struct pf_fragment *frag; 309 u_int32_t expire = pf_time_second() - 310 pf_default_rule.timeout[PFTM_FRAG]; 311 312 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 313 VERIFY(BUFFER_FRAGMENTS(frag)); 314 if (frag->fr_timeout > expire) 315 break; 316 317 switch (frag->fr_af) { 318 case AF_INET: 319 DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n", 320 ntohs(frag->fr_id), 321 (uint64_t)VM_KERNEL_ADDRPERM(frag))); 322 break; 323 case AF_INET6: 324 DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n", 325 ntohl(frag->fr_id6), 326 (uint64_t)VM_KERNEL_ADDRPERM(frag))); 327 break; 328 default: 329 VERIFY(0 && "only IPv4 and IPv6 supported"); 330 break; 331 } 332 pf_free_fragment(frag); 333 } 334 335 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { 336 VERIFY(!BUFFER_FRAGMENTS(frag)); 337 if (frag->fr_timeout > expire) 338 break; 339 340 switch (frag->fr_af) { 341 case AF_INET: 342 DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n", 343 ntohs(frag->fr_id), 344 (uint64_t)VM_KERNEL_ADDRPERM(frag))); 345 break; 346 case AF_INET6: 347 DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n", 348 ntohl(frag->fr_id6), 349 (uint64_t)VM_KERNEL_ADDRPERM(frag))); 350 break; 351 default: 352 VERIFY(0 && "only IPv4 and IPv6 supported"); 353 break; 354 } 355 pf_free_fragment(frag); 356 VERIFY(TAILQ_EMPTY(&pf_cachequeue) || 357 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); 358 } 359} 360 361/* 362 * Try to flush old fragments to make space for new ones 363 */ 364 365static void 366pf_flush_fragments(void) 367{ 368 struct pf_fragment *frag; 369 int goal; 370 371 goal = pf_nfrents * 9 / 10; 372 DPFPRINTF(("trying to free > %d frents\n", 373 pf_nfrents - goal)); 374 while (goal < pf_nfrents) { 375 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); 376 if (frag == NULL) 377 break; 378 pf_free_fragment(frag); 379 } 380 381 382 goal = pf_ncache * 9 / 10; 383 DPFPRINTF(("trying to free > %d cache entries\n", 384 pf_ncache - goal)); 385 while (goal < pf_ncache) { 386 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); 387 if (frag == NULL) 388 break; 389 pf_free_fragment(frag); 390 } 391} 392 393/* Frees the fragments and all associated entries */ 394 395static void 396pf_free_fragment(struct pf_fragment *frag) 397{ 398 struct pf_frent *frent; 399 struct pf_frcache *frcache; 400 401 /* Free all fragments */ 402 if (BUFFER_FRAGMENTS(frag)) { 403 for (frent = LIST_FIRST(&frag->fr_queue); frent; 404 frent = LIST_FIRST(&frag->fr_queue)) { 405 LIST_REMOVE(frent, fr_next); 406 407 m_freem(frent->fr_m); 408 pool_put(&pf_frent_pl, frent); 409 pf_nfrents--; 410 } 411 } else { 412 for (frcache = LIST_FIRST(&frag->fr_cache); frcache; 413 frcache = LIST_FIRST(&frag->fr_cache)) { 414 LIST_REMOVE(frcache, fr_next); 415 416 VERIFY(LIST_EMPTY(&frag->fr_cache) || 417 LIST_FIRST(&frag->fr_cache)->fr_off > 418 frcache->fr_end); 419 420 pool_put(&pf_cent_pl, frcache); 421 pf_ncache--; 422 } 423 } 424 425 pf_remove_fragment(frag); 426} 427 428static void 429pf_ip6hdr2key(struct pf_fragment *key, struct ip6_hdr *ip6, 430 struct ip6_frag *fh) 431{ 432 key->fr_p = fh->ip6f_nxt; 433 key->fr_id6 = fh->ip6f_ident; 434 key->fr_af = AF_INET6; 435 key->fr_srcx.v6 = ip6->ip6_src; 436 key->fr_dstx.v6 = ip6->ip6_dst; 437} 438 439static void 440pf_ip2key(struct pf_fragment *key, struct ip *ip) 441{ 442 key->fr_p = ip->ip_p; 443 key->fr_id = ip->ip_id; 444 key->fr_af = AF_INET; 445 key->fr_srcx.v4.s_addr = ip->ip_src.s_addr; 446 key->fr_dstx.v4.s_addr = ip->ip_dst.s_addr; 447} 448 449static struct pf_fragment * 450pf_find_fragment_by_key(struct pf_fragment *key, struct pf_frag_tree *tree) 451{ 452 struct pf_fragment *frag; 453 454 frag = RB_FIND(pf_frag_tree, tree, key); 455 if (frag != NULL) { 456 /* XXX Are we sure we want to update the timeout? */ 457 frag->fr_timeout = pf_time_second(); 458 if (BUFFER_FRAGMENTS(frag)) { 459 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 460 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 461 } else { 462 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 463 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); 464 } 465 } 466 467 return (frag); 468} 469 470static __inline struct pf_fragment * 471pf_find_fragment_by_ipv4_header(struct ip *ip, struct pf_frag_tree *tree) 472{ 473 struct pf_fragment key; 474 pf_ip2key(&key, ip); 475 return pf_find_fragment_by_key(&key, tree); 476} 477 478static __inline struct pf_fragment * 479pf_find_fragment_by_ipv6_header(struct ip6_hdr *ip6, struct ip6_frag *fh, 480 struct pf_frag_tree *tree) 481{ 482 struct pf_fragment key; 483 pf_ip6hdr2key(&key, ip6, fh); 484 return pf_find_fragment_by_key(&key, tree); 485} 486 487/* Removes a fragment from the fragment queue and frees the fragment */ 488 489static void 490pf_remove_fragment(struct pf_fragment *frag) 491{ 492 if (BUFFER_FRAGMENTS(frag)) { 493 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 494 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 495 pool_put(&pf_frag_pl, frag); 496 } else { 497 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); 498 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); 499 pool_put(&pf_cache_pl, frag); 500 } 501} 502 503#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) 504static struct mbuf * 505pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, 506 struct pf_frent *frent, int mff) 507{ 508 struct mbuf *m = *m0, *m2; 509 struct pf_frent *frea, *next; 510 struct pf_frent *frep = NULL; 511 struct ip *ip = frent->fr_ip; 512 int hlen = ip->ip_hl << 2; 513 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 514 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; 515 u_int16_t fr_max = ip_len + off; 516 517 VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag)); 518 519 /* Strip off ip header */ 520 m->m_data += hlen; 521 m->m_len -= hlen; 522 523 /* Create a new reassembly queue for this packet */ 524 if (*frag == NULL) { 525 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 526 if (*frag == NULL) { 527 pf_flush_fragments(); 528 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 529 if (*frag == NULL) 530 goto drop_fragment; 531 } 532 533 (*frag)->fr_flags = 0; 534 (*frag)->fr_max = 0; 535 (*frag)->fr_af = AF_INET; 536 (*frag)->fr_srcx.v4 = frent->fr_ip->ip_src; 537 (*frag)->fr_dstx.v4 = frent->fr_ip->ip_dst; 538 (*frag)->fr_p = frent->fr_ip->ip_p; 539 (*frag)->fr_id = frent->fr_ip->ip_id; 540 (*frag)->fr_timeout = pf_time_second(); 541 LIST_INIT(&(*frag)->fr_queue); 542 543 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); 544 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); 545 546 /* We do not have a previous fragment */ 547 frep = NULL; 548 goto insert; 549 } 550 551 /* 552 * Find a fragment after the current one: 553 * - off contains the real shifted offset. 554 */ 555 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { 556 if (FR_IP_OFF(frea) > off) 557 break; 558 frep = frea; 559 } 560 561 VERIFY(frep != NULL || frea != NULL); 562 563 if (frep != NULL && 564 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 565 4 > off) { 566 u_int16_t precut; 567 568 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - 569 frep->fr_ip->ip_hl * 4 - off; 570 if (precut >= ip_len) 571 goto drop_fragment; 572 m_adj(frent->fr_m, precut); 573 DPFPRINTF(("overlap -%d\n", precut)); 574 /* Enforce 8 byte boundaries */ 575 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); 576 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 577 ip_len -= precut; 578 ip->ip_len = htons(ip_len); 579 } 580 581 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); 582 frea = next) { 583 u_int16_t aftercut; 584 585 aftercut = ip_len + off - FR_IP_OFF(frea); 586 DPFPRINTF(("adjust overlap %d\n", aftercut)); 587 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl 588 * 4) { 589 frea->fr_ip->ip_len = 590 htons(ntohs(frea->fr_ip->ip_len) - aftercut); 591 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + 592 (aftercut >> 3)); 593 m_adj(frea->fr_m, aftercut); 594 break; 595 } 596 597 /* This fragment is completely overlapped, lose it */ 598 next = LIST_NEXT(frea, fr_next); 599 m_freem(frea->fr_m); 600 LIST_REMOVE(frea, fr_next); 601 pool_put(&pf_frent_pl, frea); 602 pf_nfrents--; 603 } 604 605insert: 606 /* Update maximum data size */ 607 if ((*frag)->fr_max < fr_max) 608 (*frag)->fr_max = fr_max; 609 /* This is the last segment */ 610 if (!mff) 611 (*frag)->fr_flags |= PFFRAG_SEENLAST; 612 613 if (frep == NULL) 614 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); 615 else 616 LIST_INSERT_AFTER(frep, frent, fr_next); 617 618 /* Check if we are completely reassembled */ 619 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) 620 return (NULL); 621 622 /* Check if we have all the data */ 623 off = 0; 624 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { 625 next = LIST_NEXT(frep, fr_next); 626 627 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; 628 if (off < (*frag)->fr_max && 629 (next == NULL || FR_IP_OFF(next) != off)) { 630 DPFPRINTF(("missing fragment at %d, next %d, max %d\n", 631 off, next == NULL ? -1 : FR_IP_OFF(next), 632 (*frag)->fr_max)); 633 return (NULL); 634 } 635 } 636 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); 637 if (off < (*frag)->fr_max) 638 return (NULL); 639 640 /* We have all the data */ 641 frent = LIST_FIRST(&(*frag)->fr_queue); 642 VERIFY(frent != NULL); 643 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { 644 DPFPRINTF(("drop: too big: %d\n", off)); 645 pf_free_fragment(*frag); 646 *frag = NULL; 647 return (NULL); 648 } 649 next = LIST_NEXT(frent, fr_next); 650 651 /* Magic from ip_input */ 652 ip = frent->fr_ip; 653 m = frent->fr_m; 654 m2 = m->m_next; 655 m->m_next = NULL; 656 m_cat(m, m2); 657 pool_put(&pf_frent_pl, frent); 658 pf_nfrents--; 659 for (frent = next; frent != NULL; frent = next) { 660 next = LIST_NEXT(frent, fr_next); 661 662 m2 = frent->fr_m; 663 pool_put(&pf_frent_pl, frent); 664 pf_nfrents--; 665 m_cat(m, m2); 666 } 667 668 ip->ip_src = (*frag)->fr_srcx.v4; 669 ip->ip_dst = (*frag)->fr_dstx.v4; 670 671 /* Remove from fragment queue */ 672 pf_remove_fragment(*frag); 673 *frag = NULL; 674 675 hlen = ip->ip_hl << 2; 676 ip->ip_len = htons(off + hlen); 677 m->m_len += hlen; 678 m->m_data -= hlen; 679 680 /* some debugging cruft by sklower, below, will go away soon */ 681 /* XXX this should be done elsewhere */ 682 if (m->m_flags & M_PKTHDR) { 683 int plen = 0; 684 for (m2 = m; m2; m2 = m2->m_next) 685 plen += m2->m_len; 686 m->m_pkthdr.len = plen; 687 } 688 689 DPFPRINTF(("complete: 0x%llx(%d)\n", 690 (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len))); 691 return (m); 692 693drop_fragment: 694 /* Oops - fail safe - drop packet */ 695 pool_put(&pf_frent_pl, frent); 696 pf_nfrents--; 697 m_freem(m); 698 return (NULL); 699} 700 701static struct mbuf * 702pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, 703 int drop, int *nomem) 704{ 705 struct mbuf *m = *m0; 706 struct pf_frcache *frp, *fra, *cur = NULL; 707 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); 708 u_int16_t off = ntohs(h->ip_off) << 3; 709 u_int16_t fr_max = ip_len + off; 710 int hosed = 0; 711 712 VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); 713 714 /* Create a new range queue for this packet */ 715 if (*frag == NULL) { 716 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 717 if (*frag == NULL) { 718 pf_flush_fragments(); 719 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 720 if (*frag == NULL) 721 goto no_mem; 722 } 723 724 /* Get an entry for the queue */ 725 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 726 if (cur == NULL) { 727 pool_put(&pf_cache_pl, *frag); 728 *frag = NULL; 729 goto no_mem; 730 } 731 pf_ncache++; 732 733 (*frag)->fr_flags = PFFRAG_NOBUFFER; 734 (*frag)->fr_max = 0; 735 (*frag)->fr_af = AF_INET; 736 (*frag)->fr_srcx.v4 = h->ip_src; 737 (*frag)->fr_dstx.v4 = h->ip_dst; 738 (*frag)->fr_p = h->ip_p; 739 (*frag)->fr_id = h->ip_id; 740 (*frag)->fr_timeout = pf_time_second(); 741 742 cur->fr_off = off; 743 cur->fr_end = fr_max; 744 LIST_INIT(&(*frag)->fr_cache); 745 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); 746 747 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); 748 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); 749 750 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, 751 fr_max)); 752 753 goto pass; 754 } 755 756 /* 757 * Find a fragment after the current one: 758 * - off contains the real shifted offset. 759 */ 760 frp = NULL; 761 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { 762 if (fra->fr_off > off) 763 break; 764 frp = fra; 765 } 766 767 VERIFY(frp != NULL || fra != NULL); 768 769 if (frp != NULL) { 770 int precut; 771 772 precut = frp->fr_end - off; 773 if (precut >= ip_len) { 774 /* Fragment is entirely a duplicate */ 775 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", 776 h->ip_id, frp->fr_off, frp->fr_end, off, fr_max)); 777 goto drop_fragment; 778 } 779 if (precut == 0) { 780 /* They are adjacent. Fixup cache entry */ 781 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", 782 h->ip_id, frp->fr_off, frp->fr_end, off, fr_max)); 783 frp->fr_end = fr_max; 784 } else if (precut > 0) { 785 /* 786 * The first part of this payload overlaps with a 787 * fragment that has already been passed. 788 * Need to trim off the first part of the payload. 789 * But to do so easily, we need to create another 790 * mbuf to throw the original header into. 791 */ 792 793 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", 794 h->ip_id, precut, frp->fr_off, frp->fr_end, off, 795 fr_max)); 796 797 off += precut; 798 fr_max -= precut; 799 /* Update the previous frag to encompass this one */ 800 frp->fr_end = fr_max; 801 802 if (!drop) { 803 /* 804 * XXX Optimization opportunity 805 * This is a very heavy way to trim the payload. 806 * we could do it much faster by diddling mbuf 807 * internals but that would be even less legible 808 * than this mbuf magic. For my next trick, 809 * I'll pull a rabbit out of my laptop. 810 */ 811 *m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT); 812 if (*m0 == NULL) 813 goto no_mem; 814 VERIFY((*m0)->m_next == NULL); 815 m_adj(m, precut + (h->ip_hl << 2)); 816 m_cat(*m0, m); 817 m = *m0; 818 if (m->m_flags & M_PKTHDR) { 819 int plen = 0; 820 struct mbuf *t; 821 for (t = m; t; t = t->m_next) 822 plen += t->m_len; 823 m->m_pkthdr.len = plen; 824 } 825 826 827 h = mtod(m, struct ip *); 828 829 830 VERIFY((int)m->m_len == 831 ntohs(h->ip_len) - precut); 832 h->ip_off = htons(ntohs(h->ip_off) + 833 (precut >> 3)); 834 h->ip_len = htons(ntohs(h->ip_len) - precut); 835 } else { 836 hosed++; 837 } 838 } else { 839 /* There is a gap between fragments */ 840 841 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", 842 h->ip_id, -precut, frp->fr_off, frp->fr_end, off, 843 fr_max)); 844 845 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 846 if (cur == NULL) 847 goto no_mem; 848 pf_ncache++; 849 850 cur->fr_off = off; 851 cur->fr_end = fr_max; 852 LIST_INSERT_AFTER(frp, cur, fr_next); 853 } 854 } 855 856 if (fra != NULL) { 857 int aftercut; 858 int merge = 0; 859 860 aftercut = fr_max - fra->fr_off; 861 if (aftercut == 0) { 862 /* Adjacent fragments */ 863 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", 864 h->ip_id, off, fr_max, fra->fr_off, fra->fr_end)); 865 fra->fr_off = off; 866 merge = 1; 867 } else if (aftercut > 0) { 868 /* Need to chop off the tail of this fragment */ 869 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", 870 h->ip_id, aftercut, off, fr_max, fra->fr_off, 871 fra->fr_end)); 872 fra->fr_off = off; 873 fr_max -= aftercut; 874 875 merge = 1; 876 877 if (!drop) { 878 m_adj(m, -aftercut); 879 if (m->m_flags & M_PKTHDR) { 880 int plen = 0; 881 struct mbuf *t; 882 for (t = m; t; t = t->m_next) 883 plen += t->m_len; 884 m->m_pkthdr.len = plen; 885 } 886 h = mtod(m, struct ip *); 887 VERIFY((int)m->m_len == 888 ntohs(h->ip_len) - aftercut); 889 h->ip_len = htons(ntohs(h->ip_len) - aftercut); 890 } else { 891 hosed++; 892 } 893 } else if (frp == NULL) { 894 /* There is a gap between fragments */ 895 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", 896 h->ip_id, -aftercut, off, fr_max, fra->fr_off, 897 fra->fr_end)); 898 899 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 900 if (cur == NULL) 901 goto no_mem; 902 pf_ncache++; 903 904 cur->fr_off = off; 905 cur->fr_end = fr_max; 906 LIST_INSERT_BEFORE(fra, cur, fr_next); 907 } 908 909 910 /* Need to glue together two separate fragment descriptors */ 911 if (merge) { 912 if (cur && fra->fr_off <= cur->fr_end) { 913 /* Need to merge in a previous 'cur' */ 914 DPFPRINTF(("fragcache[%d]: adjacent(merge " 915 "%d-%d) %d-%d (%d-%d)\n", 916 h->ip_id, cur->fr_off, cur->fr_end, off, 917 fr_max, fra->fr_off, fra->fr_end)); 918 fra->fr_off = cur->fr_off; 919 LIST_REMOVE(cur, fr_next); 920 pool_put(&pf_cent_pl, cur); 921 pf_ncache--; 922 cur = NULL; 923 924 } else if (frp && fra->fr_off <= frp->fr_end) { 925 /* Need to merge in a modified 'frp' */ 926 VERIFY(cur == NULL); 927 DPFPRINTF(("fragcache[%d]: adjacent(merge " 928 "%d-%d) %d-%d (%d-%d)\n", 929 h->ip_id, frp->fr_off, frp->fr_end, off, 930 fr_max, fra->fr_off, fra->fr_end)); 931 fra->fr_off = frp->fr_off; 932 LIST_REMOVE(frp, fr_next); 933 pool_put(&pf_cent_pl, frp); 934 pf_ncache--; 935 frp = NULL; 936 937 } 938 } 939 } 940 941 if (hosed) { 942 /* 943 * We must keep tracking the overall fragment even when 944 * we're going to drop it anyway so that we know when to 945 * free the overall descriptor. Thus we drop the frag late. 946 */ 947 goto drop_fragment; 948 } 949 950 951pass: 952 /* Update maximum data size */ 953 if ((*frag)->fr_max < fr_max) 954 (*frag)->fr_max = fr_max; 955 956 /* This is the last segment */ 957 if (!mff) 958 (*frag)->fr_flags |= PFFRAG_SEENLAST; 959 960 /* Check if we are completely reassembled */ 961 if (((*frag)->fr_flags & PFFRAG_SEENLAST) && 962 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && 963 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { 964 /* Remove from fragment queue */ 965 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, 966 (*frag)->fr_max)); 967 pf_free_fragment(*frag); 968 *frag = NULL; 969 } 970 971 return (m); 972 973no_mem: 974 *nomem = 1; 975 976 /* Still need to pay attention to !IP_MF */ 977 if (!mff && *frag != NULL) 978 (*frag)->fr_flags |= PFFRAG_SEENLAST; 979 980 m_freem(m); 981 return (NULL); 982 983drop_fragment: 984 985 /* Still need to pay attention to !IP_MF */ 986 if (!mff && *frag != NULL) 987 (*frag)->fr_flags |= PFFRAG_SEENLAST; 988 989 if (drop) { 990 /* This fragment has been deemed bad. Don't reass */ 991 if (((*frag)->fr_flags & PFFRAG_DROP) == 0) 992 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", 993 h->ip_id)); 994 (*frag)->fr_flags |= PFFRAG_DROP; 995 } 996 997 m_freem(m); 998 return (NULL); 999} 1000 1001#define FR_IP6_OFF(fr) \ 1002 (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK)) 1003#define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen)) 1004struct mbuf * 1005pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag, 1006 struct pf_frent *frent, int mff) 1007{ 1008 struct mbuf *m, *m2; 1009 struct pf_frent *frea, *frep, *next; 1010 struct ip6_hdr *ip6; 1011 int plen, off, fr_max; 1012 1013 VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag)); 1014 m = *m0; 1015 frep = NULL; 1016 ip6 = frent->fr_ip6; 1017 off = FR_IP6_OFF(frent); 1018 plen = FR_IP6_PLEN(frent); 1019 fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof *ip6); 1020 1021 DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u " 1022 "fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, 1023 frent->fr_ip6f_hlen, fr_max, m->m_len)); 1024 1025 /* strip off headers up to the fragment payload */ 1026 m->m_data += frent->fr_ip6f_hlen; 1027 m->m_len -= frent->fr_ip6f_hlen; 1028 1029 /* Create a new reassembly queue for this packet */ 1030 if (*frag == NULL) { 1031 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 1032 if (*frag == NULL) { 1033 pf_flush_fragments(); 1034 *frag = pool_get(&pf_frag_pl, PR_NOWAIT); 1035 if (*frag == NULL) 1036 goto drop_fragment; 1037 } 1038 1039 (*frag)->fr_flags = 0; 1040 (*frag)->fr_max = 0; 1041 (*frag)->fr_af = AF_INET6; 1042 (*frag)->fr_srcx.v6 = frent->fr_ip6->ip6_src; 1043 (*frag)->fr_dstx.v6 = frent->fr_ip6->ip6_dst; 1044 (*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt; 1045 (*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident; 1046 (*frag)->fr_timeout = pf_time_second(); 1047 LIST_INIT(&(*frag)->fr_queue); 1048 1049 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); 1050 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); 1051 1052 /* We do not have a previous fragment */ 1053 frep = NULL; 1054 goto insert; 1055 } 1056 1057 /* 1058 * Find a fragment after the current one: 1059 * - off contains the real shifted offset. 1060 */ 1061 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { 1062 if (FR_IP6_OFF(frea) > off) 1063 break; 1064 frep = frea; 1065 } 1066 1067 VERIFY(frep != NULL || frea != NULL); 1068 1069 if (frep != NULL && 1070 FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off) 1071 { 1072 u_int16_t precut; 1073 1074 precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - 1075 frep->fr_ip6f_hlen - off; 1076 if (precut >= plen) 1077 goto drop_fragment; 1078 m_adj(frent->fr_m, precut); 1079 DPFPRINTF(("overlap -%d\n", precut)); 1080 /* Enforce 8 byte boundaries */ 1081 frent->fr_ip6f_opt.ip6f_offlg = 1082 htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) + 1083 (precut >> 3)); 1084 off = FR_IP6_OFF(frent); 1085 plen -= precut; 1086 ip6->ip6_plen = htons(plen); 1087 } 1088 1089 for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) { 1090 u_int16_t aftercut; 1091 1092 aftercut = plen + off - FR_IP6_OFF(frea); 1093 DPFPRINTF(("adjust overlap %d\n", aftercut)); 1094 if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) { 1095 frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) - 1096 aftercut); 1097 frea->fr_ip6f_opt.ip6f_offlg = 1098 htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) + 1099 (aftercut >> 3)); 1100 m_adj(frea->fr_m, aftercut); 1101 break; 1102 } 1103 1104 /* This fragment is completely overlapped, lose it */ 1105 next = LIST_NEXT(frea, fr_next); 1106 m_freem(frea->fr_m); 1107 LIST_REMOVE(frea, fr_next); 1108 pool_put(&pf_frent_pl, frea); 1109 pf_nfrents--; 1110 } 1111 1112 insert: 1113 /* Update maximum data size */ 1114 if ((*frag)->fr_max < fr_max) 1115 (*frag)->fr_max = fr_max; 1116 /* This is the last segment */ 1117 if (!mff) 1118 (*frag)->fr_flags |= PFFRAG_SEENLAST; 1119 1120 if (frep == NULL) 1121 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); 1122 else 1123 LIST_INSERT_AFTER(frep, frent, fr_next); 1124 1125 /* Check if we are completely reassembled */ 1126 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) 1127 return (NULL); 1128 1129 /* Check if we have all the data */ 1130 off = 0; 1131 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { 1132 next = LIST_NEXT(frep, fr_next); 1133 off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6); 1134 DPFPRINTF(("frep at %d, next %d, max %d\n", 1135 off, next == NULL ? -1 : FR_IP6_OFF(next), 1136 (*frag)->fr_max)); 1137 if (off < (*frag)->fr_max && 1138 (next == NULL || FR_IP6_OFF(next) != off)) { 1139 DPFPRINTF(("missing fragment at %d, next %d, max %d\n", 1140 off, next == NULL ? -1 : FR_IP6_OFF(next), 1141 (*frag)->fr_max)); 1142 return (NULL); 1143 } 1144 } 1145 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); 1146 if (off < (*frag)->fr_max) 1147 return (NULL); 1148 1149 /* We have all the data */ 1150 frent = LIST_FIRST(&(*frag)->fr_queue); 1151 VERIFY(frent != NULL); 1152 if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) { 1153 DPFPRINTF(("drop: too big: %d\n", off)); 1154 pf_free_fragment(*frag); 1155 *frag = NULL; 1156 return (NULL); 1157 } 1158 1159 ip6 = frent->fr_ip6; 1160 ip6->ip6_nxt = (*frag)->fr_p; 1161 ip6->ip6_plen = htons(off); 1162 ip6->ip6_src = (*frag)->fr_srcx.v6; 1163 ip6->ip6_dst = (*frag)->fr_dstx.v6; 1164 1165 /* Remove from fragment queue */ 1166 pf_remove_fragment(*frag); 1167 *frag = NULL; 1168 1169 m = frent->fr_m; 1170 m->m_len += sizeof(struct ip6_hdr); 1171 m->m_data -= sizeof(struct ip6_hdr); 1172 memmove(m->m_data, ip6, sizeof(struct ip6_hdr)); 1173 1174 next = LIST_NEXT(frent, fr_next); 1175 pool_put(&pf_frent_pl, frent); 1176 pf_nfrents--; 1177 for (frent = next; next != NULL; frent = next) { 1178 m2 = frent->fr_m; 1179 1180 m_cat(m, m2); 1181 next = LIST_NEXT(frent, fr_next); 1182 pool_put(&pf_frent_pl, frent); 1183 pf_nfrents--; 1184 } 1185 1186 /* XXX this should be done elsewhere */ 1187 if (m->m_flags & M_PKTHDR) { 1188 int pktlen = 0; 1189 for (m2 = m; m2; m2 = m2->m_next) 1190 pktlen += m2->m_len; 1191 m->m_pkthdr.len = pktlen; 1192 } 1193 1194 DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n", 1195 (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip6->ip6_plen), 1196 m->m_pkthdr.len)); 1197 1198 return m; 1199 1200 drop_fragment: 1201 /* Oops - fail safe - drop packet */ 1202 pool_put(&pf_frent_pl, frent); 1203 --pf_nfrents; 1204 m_freem(m); 1205 return NULL; 1206} 1207 1208static struct mbuf * 1209pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh, 1210 struct pf_fragment **frag, int hlen, int mff, int drop, int *nomem) 1211{ 1212 struct mbuf *m = *m0; 1213 u_int16_t plen, off, fr_max; 1214 struct pf_frcache *frp, *fra, *cur = NULL; 1215 int hosed = 0; 1216 1217 VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); 1218 m = *m0; 1219 off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK); 1220 plen = ntohs(h->ip6_plen) - (hlen - sizeof *h); 1221 1222 /* 1223 * Apple Modification: dimambro@apple.com. The hlen, being passed 1224 * into this function Includes all the headers associated with 1225 * the packet, and may include routing headers, so to get to 1226 * the data payload as stored in the original IPv6 header we need 1227 * to subtract al those headers and the IP header. 1228 * 1229 * The 'max' local variable should also contain the offset from the start 1230 * of the reassembled packet to the octet just past the end of the octets 1231 * in the current fragment where: 1232 * - 'off' is the offset from the start of the reassembled packet to the 1233 * first octet in the fragment, 1234 * - 'plen' is the length of the "payload data length" Excluding all the 1235 * IPv6 headers of the fragment. 1236 * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start 1237 * of the IPv6 packet to the beginning of the data. 1238 */ 1239 fr_max = off + plen; 1240 1241 DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n", 1242 (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, fr_max)); 1243 1244 /* Create a new range queue for this packet */ 1245 if (*frag == NULL) { 1246 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 1247 if (*frag == NULL) { 1248 pf_flush_fragments(); 1249 *frag = pool_get(&pf_cache_pl, PR_NOWAIT); 1250 if (*frag == NULL) 1251 goto no_mem; 1252 } 1253 1254 /* Get an entry for the queue */ 1255 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 1256 if (cur == NULL) { 1257 pool_put(&pf_cache_pl, *frag); 1258 *frag = NULL; 1259 goto no_mem; 1260 } 1261 pf_ncache++; 1262 1263 (*frag)->fr_flags = PFFRAG_NOBUFFER; 1264 (*frag)->fr_max = 0; 1265 (*frag)->fr_af = AF_INET6; 1266 (*frag)->fr_srcx.v6 = h->ip6_src; 1267 (*frag)->fr_dstx.v6 = h->ip6_dst; 1268 (*frag)->fr_p = fh->ip6f_nxt; 1269 (*frag)->fr_id6 = fh->ip6f_ident; 1270 (*frag)->fr_timeout = pf_time_second(); 1271 1272 cur->fr_off = off; 1273 cur->fr_end = fr_max; 1274 LIST_INIT(&(*frag)->fr_cache); 1275 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); 1276 1277 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); 1278 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); 1279 1280 DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident), 1281 off, fr_max)); 1282 1283 goto pass; 1284 } 1285 1286 /* 1287 * Find a fragment after the current one: 1288 * - off contains the real shifted offset. 1289 */ 1290 frp = NULL; 1291 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { 1292 if (fra->fr_off > off) 1293 break; 1294 frp = fra; 1295 } 1296 1297 VERIFY(frp != NULL || fra != NULL); 1298 1299 if (frp != NULL) { 1300 int precut; 1301 1302 precut = frp->fr_end - off; 1303 if (precut >= plen) { 1304 /* Fragment is entirely a duplicate */ 1305 DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n", 1306 ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end, 1307 off, fr_max)); 1308 goto drop_fragment; 1309 } 1310 if (precut == 0) { 1311 /* They are adjacent. Fixup cache entry */ 1312 DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n", 1313 ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end, 1314 off, fr_max)); 1315 frp->fr_end = fr_max; 1316 } else if (precut > 0) { 1317 /* The first part of this payload overlaps with a 1318 * fragment that has already been passed. 1319 * Need to trim off the first part of the payload. 1320 * But to do so easily, we need to create another 1321 * mbuf to throw the original header into. 1322 */ 1323 1324 DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n", 1325 ntohl(fh->ip6f_ident), precut, frp->fr_off, 1326 frp->fr_end, off, fr_max)); 1327 1328 off += precut; 1329 fr_max -= precut; 1330 /* Update the previous frag to encompass this one */ 1331 frp->fr_end = fr_max; 1332 1333 if (!drop) { 1334 /* XXX Optimization opportunity 1335 * This is a very heavy way to trim the payload. 1336 * we could do it much faster by diddling mbuf 1337 * internals but that would be even less legible 1338 * than this mbuf magic. For my next trick, 1339 * I'll pull a rabbit out of my laptop. 1340 */ 1341 *m0 = m_copym(m, 0, hlen, M_NOWAIT); 1342 if (*m0 == NULL) 1343 goto no_mem; 1344 VERIFY((*m0)->m_next == NULL); 1345 m_adj(m, precut + hlen); 1346 m_cat(*m0, m); 1347 m = *m0; 1348 if (m->m_flags & M_PKTHDR) { 1349 int pktlen = 0; 1350 struct mbuf *t; 1351 for (t = m; t; t = t->m_next) 1352 pktlen += t->m_len; 1353 m->m_pkthdr.len = pktlen; 1354 } 1355 1356 h = mtod(m, struct ip6_hdr *); 1357 1358 VERIFY((int)m->m_len == 1359 ntohs(h->ip6_plen) - precut); 1360 fh->ip6f_offlg &= ~IP6F_OFF_MASK; 1361 fh->ip6f_offlg |= 1362 htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK) 1363 + (precut >> 3)); 1364 h->ip6_plen = htons(ntohs(h->ip6_plen) - 1365 precut); 1366 } else { 1367 hosed++; 1368 } 1369 } else { 1370 /* There is a gap between fragments */ 1371 1372 DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n", 1373 ntohl(fh->ip6f_ident), -precut, frp->fr_off, 1374 frp->fr_end, off, fr_max)); 1375 1376 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 1377 if (cur == NULL) 1378 goto no_mem; 1379 pf_ncache++; 1380 1381 cur->fr_off = off; 1382 cur->fr_end = fr_max; 1383 LIST_INSERT_AFTER(frp, cur, fr_next); 1384 } 1385 } 1386 1387 if (fra != NULL) { 1388 int aftercut; 1389 int merge = 0; 1390 1391 aftercut = fr_max - fra->fr_off; 1392 if (aftercut == 0) { 1393 /* Adjacent fragments */ 1394 DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n", 1395 ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off, 1396 fra->fr_end)); 1397 fra->fr_off = off; 1398 merge = 1; 1399 } else if (aftercut > 0) { 1400 /* Need to chop off the tail of this fragment */ 1401 DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n", 1402 ntohl(fh->ip6f_ident), aftercut, off, fr_max, 1403 fra->fr_off, fra->fr_end)); 1404 fra->fr_off = off; 1405 fr_max -= aftercut; 1406 1407 merge = 1; 1408 1409 if (!drop) { 1410 m_adj(m, -aftercut); 1411 if (m->m_flags & M_PKTHDR) { 1412 int pktlen = 0; 1413 struct mbuf *t; 1414 for (t = m; t; t = t->m_next) 1415 pktlen += t->m_len; 1416 m->m_pkthdr.len = pktlen; 1417 } 1418 h = mtod(m, struct ip6_hdr *); 1419 VERIFY((int)m->m_len == 1420 ntohs(h->ip6_plen) - aftercut); 1421 h->ip6_plen = 1422 htons(ntohs(h->ip6_plen) - aftercut); 1423 } else { 1424 hosed++; 1425 } 1426 } else if (frp == NULL) { 1427 /* There is a gap between fragments */ 1428 DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n", 1429 ntohl(fh->ip6f_ident), -aftercut, off, fr_max, 1430 fra->fr_off, fra->fr_end)); 1431 1432 cur = pool_get(&pf_cent_pl, PR_NOWAIT); 1433 if (cur == NULL) 1434 goto no_mem; 1435 pf_ncache++; 1436 1437 cur->fr_off = off; 1438 cur->fr_end = fr_max; 1439 LIST_INSERT_BEFORE(fra, cur, fr_next); 1440 } 1441 1442 /* Need to glue together two separate fragment descriptors */ 1443 if (merge) { 1444 if (cur && fra->fr_off <= cur->fr_end) { 1445 /* Need to merge in a previous 'cur' */ 1446 DPFPRINTF(("frag6cache[%u]: adjacent(merge " 1447 "%d-%d) %d-%d (%d-%d)\n", 1448 ntohl(fh->ip6f_ident), cur->fr_off, 1449 cur->fr_end, off, fr_max, fra->fr_off, 1450 fra->fr_end)); 1451 fra->fr_off = cur->fr_off; 1452 LIST_REMOVE(cur, fr_next); 1453 pool_put(&pf_cent_pl, cur); 1454 pf_ncache--; 1455 cur = NULL; 1456 } else if (frp && fra->fr_off <= frp->fr_end) { 1457 /* Need to merge in a modified 'frp' */ 1458 VERIFY(cur == NULL); 1459 DPFPRINTF(("frag6cache[%u]: adjacent(merge " 1460 "%d-%d) %d-%d (%d-%d)\n", 1461 ntohl(fh->ip6f_ident), frp->fr_off, 1462 frp->fr_end, off, fr_max, fra->fr_off, 1463 fra->fr_end)); 1464 fra->fr_off = frp->fr_off; 1465 LIST_REMOVE(frp, fr_next); 1466 pool_put(&pf_cent_pl, frp); 1467 pf_ncache--; 1468 frp = NULL; 1469 } 1470 } 1471 } 1472 1473 if (hosed) { 1474 /* 1475 * We must keep tracking the overall fragment even when 1476 * we're going to drop it anyway so that we know when to 1477 * free the overall descriptor. Thus we drop the frag late. 1478 */ 1479 goto drop_fragment; 1480 } 1481 1482 pass: 1483 /* Update maximum data size */ 1484 if ((*frag)->fr_max < fr_max) 1485 (*frag)->fr_max = fr_max; 1486 1487 /* This is the last segment */ 1488 if (!mff) 1489 (*frag)->fr_flags |= PFFRAG_SEENLAST; 1490 1491 /* Check if we are completely reassembled */ 1492 if (((*frag)->fr_flags & PFFRAG_SEENLAST) && 1493 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && 1494 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { 1495 /* Remove from fragment queue */ 1496 DPFPRINTF(("frag6cache[%u]: done 0-%d\n", 1497 ntohl(fh->ip6f_ident), (*frag)->fr_max)); 1498 pf_free_fragment(*frag); 1499 *frag = NULL; 1500 } 1501 1502 return (m); 1503 1504 no_mem: 1505 *nomem = 1; 1506 1507 /* Still need to pay attention to !IP_MF */ 1508 if (!mff && *frag != NULL) 1509 (*frag)->fr_flags |= PFFRAG_SEENLAST; 1510 1511 m_freem(m); 1512 return (NULL); 1513 1514 drop_fragment: 1515 1516 /* Still need to pay attention to !IP_MF */ 1517 if (!mff && *frag != NULL) 1518 (*frag)->fr_flags |= PFFRAG_SEENLAST; 1519 1520 if (drop) { 1521 /* This fragment has been deemed bad. Don't reass */ 1522 if (((*frag)->fr_flags & PFFRAG_DROP) == 0) 1523 DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n", 1524 ntohl(fh->ip6f_ident))); 1525 (*frag)->fr_flags |= PFFRAG_DROP; 1526 } 1527 1528 m_freem(m); 1529 return (NULL); 1530} 1531 1532int 1533pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, 1534 struct pf_pdesc *pd) 1535{ 1536 struct mbuf *m = *m0; 1537 struct pf_rule *r; 1538 struct pf_frent *frent; 1539 struct pf_fragment *frag = NULL; 1540 struct ip *h = mtod(m, struct ip *); 1541 int mff = (ntohs(h->ip_off) & IP_MF); 1542 int hlen = h->ip_hl << 2; 1543 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 1544 u_int16_t fr_max; 1545 int ip_len; 1546 int ip_off; 1547 int asd = 0; 1548 struct pf_ruleset *ruleset = NULL; 1549 1550 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1551 while (r != NULL) { 1552 r->evaluations++; 1553 if (pfi_kif_match(r->kif, kif) == r->ifnot) 1554 r = r->skip[PF_SKIP_IFP].ptr; 1555 else if (r->direction && r->direction != dir) 1556 r = r->skip[PF_SKIP_DIR].ptr; 1557 else if (r->af && r->af != AF_INET) 1558 r = r->skip[PF_SKIP_AF].ptr; 1559 else if (r->proto && r->proto != h->ip_p) 1560 r = r->skip[PF_SKIP_PROTO].ptr; 1561 else if (PF_MISMATCHAW(&r->src.addr, 1562 (struct pf_addr *)&h->ip_src.s_addr, AF_INET, 1563 r->src.neg, kif)) 1564 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1565 else if (PF_MISMATCHAW(&r->dst.addr, 1566 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, 1567 r->dst.neg, NULL)) 1568 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1569 else { 1570 if (r->anchor == NULL) 1571 break; 1572 else 1573 pf_step_into_anchor(&asd, &ruleset, 1574 PF_RULESET_SCRUB, &r, NULL, NULL); 1575 } 1576 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 1577 PF_RULESET_SCRUB, &r, NULL, NULL)) 1578 break; 1579 } 1580 1581 if (r == NULL || r->action == PF_NOSCRUB) 1582 return (PF_PASS); 1583 else { 1584 r->packets[dir == PF_OUT]++; 1585 r->bytes[dir == PF_OUT] += pd->tot_len; 1586 } 1587 1588 /* Check for illegal packets */ 1589 if (hlen < (int)sizeof (struct ip)) 1590 goto drop; 1591 1592 if (hlen > ntohs(h->ip_len)) 1593 goto drop; 1594 1595 /* Clear IP_DF if the rule uses the no-df option */ 1596 if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) { 1597 u_int16_t ipoff = h->ip_off; 1598 1599 h->ip_off &= htons(~IP_DF); 1600 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0); 1601 } 1602 1603 /* We will need other tests here */ 1604 if (!fragoff && !mff) 1605 goto no_fragment; 1606 1607 /* 1608 * We're dealing with a fragment now. Don't allow fragments 1609 * with IP_DF to enter the cache. If the flag was cleared by 1610 * no-df above, fine. Otherwise drop it. 1611 */ 1612 if (h->ip_off & htons(IP_DF)) { 1613 DPFPRINTF(("IP_DF\n")); 1614 goto bad; 1615 } 1616 1617 ip_len = ntohs(h->ip_len) - hlen; 1618 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 1619 1620 /* All fragments are 8 byte aligned */ 1621 if (mff && (ip_len & 0x7)) { 1622 DPFPRINTF(("mff and %d\n", ip_len)); 1623 goto bad; 1624 } 1625 1626 /* Respect maximum length */ 1627 if (fragoff + ip_len > IP_MAXPACKET) { 1628 DPFPRINTF(("max packet %d\n", fragoff + ip_len)); 1629 goto bad; 1630 } 1631 fr_max = fragoff + ip_len; 1632 1633 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { 1634 /* Fully buffer all of the fragments */ 1635 1636 frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree); 1637 /* Check if we saw the last fragment already */ 1638 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 1639 fr_max > frag->fr_max) 1640 goto bad; 1641 1642 /* Get an entry for the fragment queue */ 1643 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 1644 if (frent == NULL) { 1645 REASON_SET(reason, PFRES_MEMORY); 1646 return (PF_DROP); 1647 } 1648 pf_nfrents++; 1649 frent->fr_ip = h; 1650 frent->fr_m = m; 1651 1652 /* Might return a completely reassembled mbuf, or NULL */ 1653 DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id), 1654 fragoff, fr_max)); 1655 *m0 = m = pf_reassemble(m0, &frag, frent, mff); 1656 1657 if (m == NULL) 1658 return (PF_DROP); 1659 1660 VERIFY(m->m_flags & M_PKTHDR); 1661 1662 /* use mtag from concatenated mbuf chain */ 1663 pd->pf_mtag = pf_find_mtag(m); 1664#if DIAGNOSTIC 1665 if (pd->pf_mtag == NULL) { 1666 printf("%s: pf_find_mtag returned NULL(1)\n", __func__); 1667 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { 1668 m_freem(m); 1669 *m0 = NULL; 1670 goto no_mem; 1671 } 1672 } 1673#endif 1674 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 1675 goto drop; 1676 1677 h = mtod(m, struct ip *); 1678 } else { 1679 /* non-buffering fragment cache (drops or masks overlaps) */ 1680 int nomem = 0; 1681 1682 if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) { 1683 /* 1684 * Already passed the fragment cache in the 1685 * input direction. If we continued, it would 1686 * appear to be a dup and would be dropped. 1687 */ 1688 goto fragment_pass; 1689 } 1690 1691 frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree); 1692 1693 /* Check if we saw the last fragment already */ 1694 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 1695 fr_max > frag->fr_max) { 1696 if (r->rule_flag & PFRULE_FRAGDROP) 1697 frag->fr_flags |= PFFRAG_DROP; 1698 goto bad; 1699 } 1700 1701 *m0 = m = pf_fragcache(m0, h, &frag, mff, 1702 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); 1703 if (m == NULL) { 1704 if (nomem) 1705 goto no_mem; 1706 goto drop; 1707 } 1708 1709 VERIFY(m->m_flags & M_PKTHDR); 1710 1711 /* use mtag from copied and trimmed mbuf chain */ 1712 pd->pf_mtag = pf_find_mtag(m); 1713#if DIAGNOSTIC 1714 if (pd->pf_mtag == NULL) { 1715 printf("%s: pf_find_mtag returned NULL(2)\n", __func__); 1716 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { 1717 m_freem(m); 1718 *m0 = NULL; 1719 goto no_mem; 1720 } 1721 } 1722#endif 1723 if (dir == PF_IN) 1724 pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE; 1725 1726 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 1727 goto drop; 1728 goto fragment_pass; 1729 } 1730 1731no_fragment: 1732 /* At this point, only IP_DF is allowed in ip_off */ 1733 if (h->ip_off & ~htons(IP_DF)) { 1734 u_int16_t ipoff = h->ip_off; 1735 1736 h->ip_off &= htons(IP_DF); 1737 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0); 1738 } 1739 1740 /* Enforce a minimum ttl, may cause endless packet loops */ 1741 if (r->min_ttl && h->ip_ttl < r->min_ttl) { 1742 u_int16_t ip_ttl = h->ip_ttl; 1743 1744 h->ip_ttl = r->min_ttl; 1745 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); 1746 } 1747 if (r->rule_flag & PFRULE_RANDOMID) { 1748 u_int16_t oip_id = h->ip_id; 1749 1750 h->ip_id = ip_randomid(); 1751 h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, 0); 1752 } 1753 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1754 pd->flags |= PFDESC_IP_REAS; 1755 1756 return (PF_PASS); 1757 1758fragment_pass: 1759 /* Enforce a minimum ttl, may cause endless packet loops */ 1760 if (r->min_ttl && h->ip_ttl < r->min_ttl) { 1761 u_int16_t ip_ttl = h->ip_ttl; 1762 1763 h->ip_ttl = r->min_ttl; 1764 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); 1765 } 1766 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1767 pd->flags |= PFDESC_IP_REAS; 1768 return (PF_PASS); 1769 1770no_mem: 1771 REASON_SET(reason, PFRES_MEMORY); 1772 if (r != NULL && r->log) 1773 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, 1774 NULL, NULL, pd); 1775 return (PF_DROP); 1776 1777drop: 1778 REASON_SET(reason, PFRES_NORM); 1779 if (r != NULL && r->log) 1780 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, 1781 NULL, NULL, pd); 1782 return (PF_DROP); 1783 1784bad: 1785 DPFPRINTF(("dropping bad IPv4 fragment\n")); 1786 1787 /* Free associated fragments */ 1788 if (frag != NULL) 1789 pf_free_fragment(frag); 1790 1791 REASON_SET(reason, PFRES_FRAG); 1792 if (r != NULL && r->log) 1793 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); 1794 1795 return (PF_DROP); 1796} 1797 1798#if INET6 1799int 1800pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, 1801 u_short *reason, struct pf_pdesc *pd) 1802{ 1803 struct mbuf *m = *m0; 1804 struct pf_rule *r; 1805 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1806 int off; 1807 struct ip6_ext ext; 1808/* adi XXX */ 1809#if 0 1810 struct ip6_opt opt; 1811 struct ip6_opt_jumbo jumbo; 1812 int optend; 1813 int ooff; 1814#endif 1815 struct ip6_frag frag; 1816 u_int32_t jumbolen = 0, plen; 1817 u_int16_t fragoff = 0; 1818 u_int8_t proto; 1819 int terminal; 1820 struct pf_frent *frent; 1821 struct pf_fragment *pff = NULL; 1822 int mff = 0, rh_cnt = 0; 1823 u_int16_t fr_max; 1824 int asd = 0; 1825 struct pf_ruleset *ruleset = NULL; 1826 1827 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1828 while (r != NULL) { 1829 r->evaluations++; 1830 if (pfi_kif_match(r->kif, kif) == r->ifnot) 1831 r = r->skip[PF_SKIP_IFP].ptr; 1832 else if (r->direction && r->direction != dir) 1833 r = r->skip[PF_SKIP_DIR].ptr; 1834 else if (r->af && r->af != AF_INET6) 1835 r = r->skip[PF_SKIP_AF].ptr; 1836#if 0 /* header chain! */ 1837 else if (r->proto && r->proto != h->ip6_nxt) 1838 r = r->skip[PF_SKIP_PROTO].ptr; 1839#endif 1840 else if (PF_MISMATCHAW(&r->src.addr, 1841 (struct pf_addr *)&h->ip6_src, AF_INET6, 1842 r->src.neg, kif)) 1843 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1844 else if (PF_MISMATCHAW(&r->dst.addr, 1845 (struct pf_addr *)&h->ip6_dst, AF_INET6, 1846 r->dst.neg, NULL)) 1847 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1848 else { 1849 if (r->anchor == NULL) 1850 break; 1851 else 1852 pf_step_into_anchor(&asd, &ruleset, 1853 PF_RULESET_SCRUB, &r, NULL, NULL); 1854 } 1855 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 1856 PF_RULESET_SCRUB, &r, NULL, NULL)) 1857 break; 1858 } 1859 1860 if (r == NULL || r->action == PF_NOSCRUB) 1861 return (PF_PASS); 1862 else { 1863 r->packets[dir == PF_OUT]++; 1864 r->bytes[dir == PF_OUT] += pd->tot_len; 1865 } 1866 1867 /* Check for illegal packets */ 1868 if ((int)(sizeof (struct ip6_hdr) + IPV6_MAXPACKET) < m->m_pkthdr.len) 1869 goto drop; 1870 1871 off = sizeof (struct ip6_hdr); 1872 proto = h->ip6_nxt; 1873 terminal = 0; 1874 do { 1875 pd->proto = proto; 1876 switch (proto) { 1877 case IPPROTO_FRAGMENT: 1878 goto fragment; 1879 break; 1880 case IPPROTO_AH: 1881 case IPPROTO_ROUTING: 1882 case IPPROTO_DSTOPTS: 1883 if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL, 1884 NULL, AF_INET6)) 1885 goto shortpkt; 1886 /* 1887 * <jhw@apple.com> 1888 * Multiple routing headers not allowed. 1889 * Routing header type zero considered harmful. 1890 */ 1891 if (proto == IPPROTO_ROUTING) { 1892 const struct ip6_rthdr *rh = 1893 (const struct ip6_rthdr *)&ext; 1894 if (rh_cnt++) 1895 goto drop; 1896 if (rh->ip6r_type == IPV6_RTHDR_TYPE_0) 1897 goto drop; 1898 } 1899 else 1900 if (proto == IPPROTO_AH) 1901 off += (ext.ip6e_len + 2) * 4; 1902 else 1903 off += (ext.ip6e_len + 1) * 8; 1904 proto = ext.ip6e_nxt; 1905 break; 1906 case IPPROTO_HOPOPTS: 1907/* adi XXX */ 1908#if 0 1909 if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL, 1910 NULL, AF_INET6)) 1911 goto shortpkt; 1912 optend = off + (ext.ip6e_len + 1) * 8; 1913 ooff = off + sizeof (ext); 1914 do { 1915 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, 1916 sizeof (opt.ip6o_type), NULL, NULL, 1917 AF_INET6)) 1918 goto shortpkt; 1919 if (opt.ip6o_type == IP6OPT_PAD1) { 1920 ooff++; 1921 continue; 1922 } 1923 if (!pf_pull_hdr(m, ooff, &opt, sizeof (opt), 1924 NULL, NULL, AF_INET6)) 1925 goto shortpkt; 1926 if (ooff + sizeof (opt) + opt.ip6o_len > optend) 1927 goto drop; 1928 switch (opt.ip6o_type) { 1929 case IP6OPT_JUMBO: 1930 if (h->ip6_plen != 0) 1931 goto drop; 1932 if (!pf_pull_hdr(m, ooff, &jumbo, 1933 sizeof (jumbo), NULL, NULL, 1934 AF_INET6)) 1935 goto shortpkt; 1936 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, 1937 sizeof (jumbolen)); 1938 jumbolen = ntohl(jumbolen); 1939 if (jumbolen <= IPV6_MAXPACKET) 1940 goto drop; 1941 if (sizeof (struct ip6_hdr) + 1942 jumbolen != m->m_pkthdr.len) 1943 goto drop; 1944 break; 1945 default: 1946 break; 1947 } 1948 ooff += sizeof (opt) + opt.ip6o_len; 1949 } while (ooff < optend); 1950 1951 off = optend; 1952 proto = ext.ip6e_nxt; 1953 break; 1954#endif 1955 default: 1956 terminal = 1; 1957 break; 1958 } 1959 } while (!terminal); 1960 1961 /* jumbo payload option must be present, or plen > 0 */ 1962 if (ntohs(h->ip6_plen) == 0) 1963 plen = jumbolen; 1964 else 1965 plen = ntohs(h->ip6_plen); 1966 if (plen == 0) 1967 goto drop; 1968 if ((int)(sizeof (struct ip6_hdr) + plen) > m->m_pkthdr.len) 1969 goto shortpkt; 1970 1971 /* Enforce a minimum ttl, may cause endless packet loops */ 1972 if (r->min_ttl && h->ip6_hlim < r->min_ttl) 1973 h->ip6_hlim = r->min_ttl; 1974 1975 return (PF_PASS); 1976 1977fragment: 1978 if (ntohs(h->ip6_plen) == 0 || jumbolen) 1979 goto drop; 1980 plen = ntohs(h->ip6_plen); 1981 1982 if (!pf_pull_hdr(m, off, &frag, sizeof (frag), NULL, NULL, AF_INET6)) 1983 goto shortpkt; 1984 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); 1985 pd->proto = frag.ip6f_nxt; 1986 mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG); 1987 off += sizeof frag; 1988 if (fragoff + (plen - off) > IPV6_MAXPACKET) 1989 goto badfrag; 1990 1991 fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr)); 1992 DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u " 1993 "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off, 1994 fragoff, fr_max)); 1995 1996 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { 1997 /* Fully buffer all of the fragments */ 1998 pd->flags |= PFDESC_IP_REAS; 1999 2000 pff = pf_find_fragment_by_ipv6_header(h, &frag, 2001 &pf_frag_tree); 2002 2003 /* Check if we saw the last fragment already */ 2004 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) && 2005 fr_max > pff->fr_max) 2006 goto badfrag; 2007 2008 /* Get an entry for the fragment queue */ 2009 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 2010 if (frent == NULL) { 2011 REASON_SET(reason, PFRES_MEMORY); 2012 return (PF_DROP); 2013 } 2014 pf_nfrents++; 2015 frent->fr_ip6 = h; 2016 frent->fr_m = m; 2017 frent->fr_ip6f_opt = frag; 2018 frent->fr_ip6f_hlen = off; 2019 2020 /* Might return a completely reassembled mbuf, or NULL */ 2021 DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n", 2022 ntohl(frag.ip6f_ident), fragoff, fr_max)); 2023 *m0 = m = pf_reassemble6(m0, &pff, frent, mff); 2024 2025 if (m == NULL) 2026 return (PF_DROP); 2027 2028 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) 2029 goto drop; 2030 2031 h = mtod(m, struct ip6_hdr *); 2032 } 2033 else if (dir == PF_IN || !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) { 2034 /* non-buffering fragment cache (overlaps: see RFC 5722) */ 2035 int nomem = 0; 2036 2037 pff = pf_find_fragment_by_ipv6_header(h, &frag, 2038 &pf_cache_tree); 2039 2040 /* Check if we saw the last fragment already */ 2041 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) && 2042 fr_max > pff->fr_max) { 2043 if (r->rule_flag & PFRULE_FRAGDROP) 2044 pff->fr_flags |= PFFRAG_DROP; 2045 goto badfrag; 2046 } 2047 2048 *m0 = m = pf_frag6cache(m0, h, &frag, &pff, off, mff, 2049 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); 2050 if (m == NULL) { 2051 if (nomem) 2052 goto no_mem; 2053 goto drop; 2054 } 2055 2056 if (dir == PF_IN) 2057 pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE; 2058 2059 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) 2060 goto drop; 2061 } 2062 2063 /* Enforce a minimum ttl, may cause endless packet loops */ 2064 if (r->min_ttl && h->ip6_hlim < r->min_ttl) 2065 h->ip6_hlim = r->min_ttl; 2066 return (PF_PASS); 2067 2068 no_mem: 2069 REASON_SET(reason, PFRES_MEMORY); 2070 goto dropout; 2071 2072 shortpkt: 2073 REASON_SET(reason, PFRES_SHORT); 2074 goto dropout; 2075 2076 drop: 2077 REASON_SET(reason, PFRES_NORM); 2078 goto dropout; 2079 2080 badfrag: 2081 DPFPRINTF(("dropping bad IPv6 fragment\n")); 2082 REASON_SET(reason, PFRES_FRAG); 2083 goto dropout; 2084 2085 dropout: 2086 if (pff != NULL) 2087 pf_free_fragment(pff); 2088 if (r != NULL && r->log) 2089 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); 2090 return (PF_DROP); 2091} 2092#endif /* INET6 */ 2093 2094int 2095pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, 2096 int off, void *h, struct pf_pdesc *pd) 2097{ 2098#pragma unused(ipoff, h) 2099 struct pf_rule *r, *rm = NULL; 2100 struct tcphdr *th = pd->hdr.tcp; 2101 int rewrite = 0; 2102 int asd = 0; 2103 u_short reason; 2104 u_int8_t flags; 2105 sa_family_t af = pd->af; 2106 struct pf_ruleset *ruleset = NULL; 2107 union pf_state_xport sxport, dxport; 2108 2109 sxport.port = th->th_sport; 2110 dxport.port = th->th_dport; 2111 2112 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 2113 while (r != NULL) { 2114 r->evaluations++; 2115 if (pfi_kif_match(r->kif, kif) == r->ifnot) 2116 r = r->skip[PF_SKIP_IFP].ptr; 2117 else if (r->direction && r->direction != dir) 2118 r = r->skip[PF_SKIP_DIR].ptr; 2119 else if (r->af && r->af != af) 2120 r = r->skip[PF_SKIP_AF].ptr; 2121 else if (r->proto && r->proto != pd->proto) 2122 r = r->skip[PF_SKIP_PROTO].ptr; 2123 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 2124 r->src.neg, kif)) 2125 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 2126 else if (r->src.xport.range.op && 2127 !pf_match_xport(r->src.xport.range.op, r->proto_variant, 2128 &r->src.xport, &sxport)) 2129 r = r->skip[PF_SKIP_SRC_PORT].ptr; 2130 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 2131 r->dst.neg, NULL)) 2132 r = r->skip[PF_SKIP_DST_ADDR].ptr; 2133 else if (r->dst.xport.range.op && 2134 !pf_match_xport(r->dst.xport.range.op, r->proto_variant, 2135 &r->dst.xport, &dxport)) 2136 r = r->skip[PF_SKIP_DST_PORT].ptr; 2137 else if (r->os_fingerprint != PF_OSFP_ANY && 2138 !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, th), 2139 r->os_fingerprint)) 2140 r = TAILQ_NEXT(r, entries); 2141 else { 2142 if (r->anchor == NULL) { 2143 rm = r; 2144 break; 2145 } else { 2146 pf_step_into_anchor(&asd, &ruleset, 2147 PF_RULESET_SCRUB, &r, NULL, NULL); 2148 } 2149 } 2150 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 2151 PF_RULESET_SCRUB, &r, NULL, NULL)) 2152 break; 2153 } 2154 2155 if (rm == NULL || rm->action == PF_NOSCRUB) 2156 return (PF_PASS); 2157 else { 2158 r->packets[dir == PF_OUT]++; 2159 r->bytes[dir == PF_OUT] += pd->tot_len; 2160 } 2161 2162 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) 2163 pd->flags |= PFDESC_TCP_NORM; 2164 2165 flags = th->th_flags; 2166 if (flags & TH_SYN) { 2167 /* Illegal packet */ 2168 if (flags & TH_RST) 2169 goto tcp_drop; 2170 2171 if (flags & TH_FIN) 2172 flags &= ~TH_FIN; 2173 } else { 2174 /* Illegal packet */ 2175 if (!(flags & (TH_ACK|TH_RST))) 2176 goto tcp_drop; 2177 } 2178 2179 if (!(flags & TH_ACK)) { 2180 /* These flags are only valid if ACK is set */ 2181 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 2182 goto tcp_drop; 2183 } 2184 2185 /* Check for illegal header length */ 2186 if (th->th_off < (sizeof (struct tcphdr) >> 2)) 2187 goto tcp_drop; 2188 2189 /* If flags changed, or reserved data set, then adjust */ 2190 if (flags != th->th_flags || th->th_x2 != 0) { 2191 u_int16_t ov, nv; 2192 2193 ov = *(u_int16_t *)(&th->th_ack + 1); 2194 th->th_flags = flags; 2195 th->th_x2 = 0; 2196 nv = *(u_int16_t *)(&th->th_ack + 1); 2197 2198 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); 2199 rewrite = 1; 2200 } 2201 2202 /* Remove urgent pointer, if TH_URG is not set */ 2203 if (!(flags & TH_URG) && th->th_urp) { 2204 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); 2205 th->th_urp = 0; 2206 rewrite = 1; 2207 } 2208 2209 /* copy back packet headers if we sanitized */ 2210 /* Process options */ 2211 if (r->max_mss) { 2212 int rv = pf_normalize_tcpopt(r, dir, kif, pd, m, th, off, 2213 &rewrite); 2214 if (rv == PF_DROP) 2215 return rv; 2216 m = pd->mp; 2217 } 2218 2219 if (rewrite) { 2220 struct mbuf *mw = pf_lazy_makewritable(pd, m, 2221 off + sizeof (*th)); 2222 if (!mw) { 2223 REASON_SET(&reason, PFRES_MEMORY); 2224 if (r->log) 2225 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, 2226 r, 0, 0, pd); 2227 return PF_DROP; 2228 } 2229 2230 m_copyback(mw, off, sizeof (*th), th); 2231 } 2232 2233 return (PF_PASS); 2234 2235tcp_drop: 2236 REASON_SET(&reason, PFRES_NORM); 2237 if (rm != NULL && r->log) 2238 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); 2239 return (PF_DROP); 2240} 2241 2242int 2243pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, 2244 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) 2245{ 2246#pragma unused(dst) 2247 u_int32_t tsval, tsecr; 2248 u_int8_t hdr[60]; 2249 u_int8_t *opt; 2250 2251 VERIFY(src->scrub == NULL); 2252 2253 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 2254 if (src->scrub == NULL) 2255 return (1); 2256 bzero(src->scrub, sizeof (*src->scrub)); 2257 2258 switch (pd->af) { 2259#if INET 2260 case AF_INET: { 2261 struct ip *h = mtod(m, struct ip *); 2262 src->scrub->pfss_ttl = h->ip_ttl; 2263 break; 2264 } 2265#endif /* INET */ 2266#if INET6 2267 case AF_INET6: { 2268 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 2269 src->scrub->pfss_ttl = h->ip6_hlim; 2270 break; 2271 } 2272#endif /* INET6 */ 2273 } 2274 2275 2276 /* 2277 * All normalizations below are only begun if we see the start of 2278 * the connections. They must all set an enabled bit in pfss_flags 2279 */ 2280 if ((th->th_flags & TH_SYN) == 0) 2281 return (0); 2282 2283 2284 if (th->th_off > (sizeof (struct tcphdr) >> 2) && src->scrub && 2285 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 2286 /* Diddle with TCP options */ 2287 int hlen; 2288 opt = hdr + sizeof (struct tcphdr); 2289 hlen = (th->th_off << 2) - sizeof (struct tcphdr); 2290 while (hlen >= TCPOLEN_TIMESTAMP) { 2291 switch (*opt) { 2292 case TCPOPT_EOL: /* FALLTHROUGH */ 2293 case TCPOPT_NOP: 2294 opt++; 2295 hlen--; 2296 break; 2297 case TCPOPT_TIMESTAMP: 2298 if (opt[1] >= TCPOLEN_TIMESTAMP) { 2299 src->scrub->pfss_flags |= 2300 PFSS_TIMESTAMP; 2301 src->scrub->pfss_ts_mod = 2302 htonl(random()); 2303 2304 /* note PFSS_PAWS not set yet */ 2305 memcpy(&tsval, &opt[2], 2306 sizeof (u_int32_t)); 2307 memcpy(&tsecr, &opt[6], 2308 sizeof (u_int32_t)); 2309 src->scrub->pfss_tsval0 = ntohl(tsval); 2310 src->scrub->pfss_tsval = ntohl(tsval); 2311 src->scrub->pfss_tsecr = ntohl(tsecr); 2312 getmicrouptime(&src->scrub->pfss_last); 2313 } 2314 /* FALLTHROUGH */ 2315 default: 2316 hlen -= MAX(opt[1], 2); 2317 opt += MAX(opt[1], 2); 2318 break; 2319 } 2320 } 2321 } 2322 2323 return (0); 2324} 2325 2326void 2327pf_normalize_tcp_cleanup(struct pf_state *state) 2328{ 2329 if (state->src.scrub) 2330 pool_put(&pf_state_scrub_pl, state->src.scrub); 2331 if (state->dst.scrub) 2332 pool_put(&pf_state_scrub_pl, state->dst.scrub); 2333 2334 /* Someday... flush the TCP segment reassembly descriptors. */ 2335} 2336 2337int 2338pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, 2339 u_short *reason, struct tcphdr *th, struct pf_state *state, 2340 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) 2341{ 2342 struct timeval uptime; 2343 u_int32_t tsval, tsecr; 2344 u_int tsval_from_last; 2345 u_int8_t hdr[60]; 2346 u_int8_t *opt; 2347 int copyback = 0; 2348 int got_ts = 0; 2349 2350 VERIFY(src->scrub || dst->scrub); 2351 2352 /* 2353 * Enforce the minimum TTL seen for this connection. Negate a common 2354 * technique to evade an intrusion detection system and confuse 2355 * firewall state code. 2356 */ 2357 switch (pd->af) { 2358#if INET 2359 case AF_INET: { 2360 if (src->scrub) { 2361 struct ip *h = mtod(m, struct ip *); 2362 if (h->ip_ttl > src->scrub->pfss_ttl) 2363 src->scrub->pfss_ttl = h->ip_ttl; 2364 h->ip_ttl = src->scrub->pfss_ttl; 2365 } 2366 break; 2367 } 2368#endif /* INET */ 2369#if INET6 2370 case AF_INET6: { 2371 if (src->scrub) { 2372 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 2373 if (h->ip6_hlim > src->scrub->pfss_ttl) 2374 src->scrub->pfss_ttl = h->ip6_hlim; 2375 h->ip6_hlim = src->scrub->pfss_ttl; 2376 } 2377 break; 2378 } 2379#endif /* INET6 */ 2380 } 2381 2382 if (th->th_off > (sizeof (struct tcphdr) >> 2) && 2383 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 2384 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 2385 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 2386 /* Diddle with TCP options */ 2387 int hlen; 2388 opt = hdr + sizeof (struct tcphdr); 2389 hlen = (th->th_off << 2) - sizeof (struct tcphdr); 2390 while (hlen >= TCPOLEN_TIMESTAMP) { 2391 switch (*opt) { 2392 case TCPOPT_EOL: /* FALLTHROUGH */ 2393 case TCPOPT_NOP: 2394 opt++; 2395 hlen--; 2396 break; 2397 case TCPOPT_TIMESTAMP: 2398 /* 2399 * Modulate the timestamps. Can be used for 2400 * NAT detection, OS uptime determination or 2401 * reboot detection. 2402 */ 2403 2404 if (got_ts) { 2405 /* Huh? Multiple timestamps!? */ 2406 if (pf_status.debug >= PF_DEBUG_MISC) { 2407 DPFPRINTF(("multiple TS??")); 2408 pf_print_state(state); 2409 printf("\n"); 2410 } 2411 REASON_SET(reason, PFRES_TS); 2412 return (PF_DROP); 2413 } 2414 if (opt[1] >= TCPOLEN_TIMESTAMP) { 2415 memcpy(&tsval, &opt[2], 2416 sizeof (u_int32_t)); 2417 if (tsval && src->scrub && 2418 (src->scrub->pfss_flags & 2419 PFSS_TIMESTAMP)) { 2420 tsval = ntohl(tsval); 2421 pf_change_a(&opt[2], 2422 &th->th_sum, 2423 htonl(tsval + 2424 src->scrub->pfss_ts_mod), 2425 0); 2426 copyback = 1; 2427 } 2428 2429 /* Modulate TS reply iff valid (!0) */ 2430 memcpy(&tsecr, &opt[6], 2431 sizeof (u_int32_t)); 2432 if (tsecr && dst->scrub && 2433 (dst->scrub->pfss_flags & 2434 PFSS_TIMESTAMP)) { 2435 tsecr = ntohl(tsecr) 2436 - dst->scrub->pfss_ts_mod; 2437 pf_change_a(&opt[6], 2438 &th->th_sum, htonl(tsecr), 2439 0); 2440 copyback = 1; 2441 } 2442 got_ts = 1; 2443 } 2444 /* FALLTHROUGH */ 2445 default: 2446 hlen -= MAX(opt[1], 2); 2447 opt += MAX(opt[1], 2); 2448 break; 2449 } 2450 } 2451 if (copyback) { 2452 /* Copyback the options, caller copys back header */ 2453 int optoff = off + sizeof (*th); 2454 int optlen = (th->th_off << 2) - sizeof (*th); 2455 m = pf_lazy_makewritable(pd, m, optoff + optlen); 2456 if (!m) { 2457 REASON_SET(reason, PFRES_MEMORY); 2458 return PF_DROP; 2459 } 2460 *writeback = optoff + optlen; 2461 m_copyback(m, optoff, optlen, hdr + sizeof (*th)); 2462 } 2463 } 2464 2465 2466 /* 2467 * Must invalidate PAWS checks on connections idle for too long. 2468 * The fastest allowed timestamp clock is 1ms. That turns out to 2469 * be about 24 days before it wraps. XXX Right now our lowerbound 2470 * TS echo check only works for the first 12 days of a connection 2471 * when the TS has exhausted half its 32bit space 2472 */ 2473#define TS_MAX_IDLE (24*24*60*60) 2474#define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 2475 2476 getmicrouptime(&uptime); 2477 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 2478 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 2479 pf_time_second() - state->creation > TS_MAX_CONN)) { 2480 if (pf_status.debug >= PF_DEBUG_MISC) { 2481 DPFPRINTF(("src idled out of PAWS\n")); 2482 pf_print_state(state); 2483 printf("\n"); 2484 } 2485 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 2486 | PFSS_PAWS_IDLED; 2487 } 2488 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 2489 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 2490 if (pf_status.debug >= PF_DEBUG_MISC) { 2491 DPFPRINTF(("dst idled out of PAWS\n")); 2492 pf_print_state(state); 2493 printf("\n"); 2494 } 2495 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 2496 | PFSS_PAWS_IDLED; 2497 } 2498 2499 if (got_ts && src->scrub && dst->scrub && 2500 (src->scrub->pfss_flags & PFSS_PAWS) && 2501 (dst->scrub->pfss_flags & PFSS_PAWS)) { 2502 /* 2503 * Validate that the timestamps are "in-window". 2504 * RFC1323 describes TCP Timestamp options that allow 2505 * measurement of RTT (round trip time) and PAWS 2506 * (protection against wrapped sequence numbers). PAWS 2507 * gives us a set of rules for rejecting packets on 2508 * long fat pipes (packets that were somehow delayed 2509 * in transit longer than the time it took to send the 2510 * full TCP sequence space of 4Gb). We can use these 2511 * rules and infer a few others that will let us treat 2512 * the 32bit timestamp and the 32bit echoed timestamp 2513 * as sequence numbers to prevent a blind attacker from 2514 * inserting packets into a connection. 2515 * 2516 * RFC1323 tells us: 2517 * - The timestamp on this packet must be greater than 2518 * or equal to the last value echoed by the other 2519 * endpoint. The RFC says those will be discarded 2520 * since it is a dup that has already been acked. 2521 * This gives us a lowerbound on the timestamp. 2522 * timestamp >= other last echoed timestamp 2523 * - The timestamp will be less than or equal to 2524 * the last timestamp plus the time between the 2525 * last packet and now. The RFC defines the max 2526 * clock rate as 1ms. We will allow clocks to be 2527 * up to 10% fast and will allow a total difference 2528 * or 30 seconds due to a route change. And this 2529 * gives us an upperbound on the timestamp. 2530 * timestamp <= last timestamp + max ticks 2531 * We have to be careful here. Windows will send an 2532 * initial timestamp of zero and then initialize it 2533 * to a random value after the 3whs; presumably to 2534 * avoid a DoS by having to call an expensive RNG 2535 * during a SYN flood. Proof MS has at least one 2536 * good security geek. 2537 * 2538 * - The TCP timestamp option must also echo the other 2539 * endpoints timestamp. The timestamp echoed is the 2540 * one carried on the earliest unacknowledged segment 2541 * on the left edge of the sequence window. The RFC 2542 * states that the host will reject any echoed 2543 * timestamps that were larger than any ever sent. 2544 * This gives us an upperbound on the TS echo. 2545 * tescr <= largest_tsval 2546 * - The lowerbound on the TS echo is a little more 2547 * tricky to determine. The other endpoint's echoed 2548 * values will not decrease. But there may be 2549 * network conditions that re-order packets and 2550 * cause our view of them to decrease. For now the 2551 * only lowerbound we can safely determine is that 2552 * the TS echo will never be less than the original 2553 * TS. XXX There is probably a better lowerbound. 2554 * Remove TS_MAX_CONN with better lowerbound check. 2555 * tescr >= other original TS 2556 * 2557 * It is also important to note that the fastest 2558 * timestamp clock of 1ms will wrap its 32bit space in 2559 * 24 days. So we just disable TS checking after 24 2560 * days of idle time. We actually must use a 12d 2561 * connection limit until we can come up with a better 2562 * lowerbound to the TS echo check. 2563 */ 2564 struct timeval delta_ts; 2565 int ts_fudge; 2566 2567 2568 /* 2569 * PFTM_TS_DIFF is how many seconds of leeway to allow 2570 * a host's timestamp. This can happen if the previous 2571 * packet got delayed in transit for much longer than 2572 * this packet. 2573 */ 2574 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 2575 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 2576 2577 2578 /* Calculate max ticks since the last timestamp */ 2579#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 2580#define TS_MICROSECS 1000000 /* microseconds per second */ 2581 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 2582 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 2583 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 2584 2585 2586 if ((src->state >= TCPS_ESTABLISHED && 2587 dst->state >= TCPS_ESTABLISHED) && 2588 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 2589 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 2590 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 2591 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 2592 /* 2593 * Bad RFC1323 implementation or an insertion attack. 2594 * 2595 * - Solaris 2.6 and 2.7 are known to send another ACK 2596 * after the FIN,FIN|ACK,ACK closing that carries 2597 * an old timestamp. 2598 */ 2599 2600 DPFPRINTF(("Timestamp failed %c%c%c%c\n", 2601 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 2602 SEQ_GT(tsval, src->scrub->pfss_tsval + 2603 tsval_from_last) ? '1' : ' ', 2604 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 2605 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); 2606 DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u " 2607 "idle: %lus %ums\n", 2608 tsval, tsecr, tsval_from_last, delta_ts.tv_sec, 2609 delta_ts.tv_usec / 1000)); 2610 DPFPRINTF((" src->tsval: %u tsecr: %u\n", 2611 src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); 2612 DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n", 2613 dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, 2614 dst->scrub->pfss_tsval0)); 2615 if (pf_status.debug >= PF_DEBUG_MISC) { 2616 pf_print_state(state); 2617 pf_print_flags(th->th_flags); 2618 printf("\n"); 2619 } 2620 REASON_SET(reason, PFRES_TS); 2621 return (PF_DROP); 2622 } 2623 2624 /* XXX I'd really like to require tsecr but it's optional */ 2625 2626 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 2627 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 2628 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 2629 src->scrub && dst->scrub && 2630 (src->scrub->pfss_flags & PFSS_PAWS) && 2631 (dst->scrub->pfss_flags & PFSS_PAWS)) { 2632 /* 2633 * Didn't send a timestamp. Timestamps aren't really useful 2634 * when: 2635 * - connection opening or closing (often not even sent). 2636 * but we must not let an attacker to put a FIN on a 2637 * data packet to sneak it through our ESTABLISHED check. 2638 * - on a TCP reset. RFC suggests not even looking at TS. 2639 * - on an empty ACK. The TS will not be echoed so it will 2640 * probably not help keep the RTT calculation in sync and 2641 * there isn't as much danger when the sequence numbers 2642 * got wrapped. So some stacks don't include TS on empty 2643 * ACKs :-( 2644 * 2645 * To minimize the disruption to mostly RFC1323 conformant 2646 * stacks, we will only require timestamps on data packets. 2647 * 2648 * And what do ya know, we cannot require timestamps on data 2649 * packets. There appear to be devices that do legitimate 2650 * TCP connection hijacking. There are HTTP devices that allow 2651 * a 3whs (with timestamps) and then buffer the HTTP request. 2652 * If the intermediate device has the HTTP response cache, it 2653 * will spoof the response but not bother timestamping its 2654 * packets. So we can look for the presence of a timestamp in 2655 * the first data packet and if there, require it in all future 2656 * packets. 2657 */ 2658 2659 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 2660 /* 2661 * Hey! Someone tried to sneak a packet in. Or the 2662 * stack changed its RFC1323 behavior?!?! 2663 */ 2664 if (pf_status.debug >= PF_DEBUG_MISC) { 2665 DPFPRINTF(("Did not receive expected RFC1323 " 2666 "timestamp\n")); 2667 pf_print_state(state); 2668 pf_print_flags(th->th_flags); 2669 printf("\n"); 2670 } 2671 REASON_SET(reason, PFRES_TS); 2672 return (PF_DROP); 2673 } 2674 } 2675 2676 2677 /* 2678 * We will note if a host sends his data packets with or without 2679 * timestamps. And require all data packets to contain a timestamp 2680 * if the first does. PAWS implicitly requires that all data packets be 2681 * timestamped. But I think there are middle-man devices that hijack 2682 * TCP streams immediately after the 3whs and don't timestamp their 2683 * packets (seen in a WWW accelerator or cache). 2684 */ 2685 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 2686 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 2687 if (got_ts) 2688 src->scrub->pfss_flags |= PFSS_DATA_TS; 2689 else { 2690 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 2691 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && 2692 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 2693 /* Don't warn if other host rejected RFC1323 */ 2694 DPFPRINTF(("Broken RFC1323 stack did not " 2695 "timestamp data packet. Disabled PAWS " 2696 "security.\n")); 2697 pf_print_state(state); 2698 pf_print_flags(th->th_flags); 2699 printf("\n"); 2700 } 2701 } 2702 } 2703 2704 2705 /* 2706 * Update PAWS values 2707 */ 2708 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 2709 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 2710 getmicrouptime(&src->scrub->pfss_last); 2711 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 2712 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 2713 src->scrub->pfss_tsval = tsval; 2714 2715 if (tsecr) { 2716 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 2717 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 2718 src->scrub->pfss_tsecr = tsecr; 2719 2720 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 2721 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 2722 src->scrub->pfss_tsval0 == 0)) { 2723 /* tsval0 MUST be the lowest timestamp */ 2724 src->scrub->pfss_tsval0 = tsval; 2725 } 2726 2727 /* Only fully initialized after a TS gets echoed */ 2728 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 2729 src->scrub->pfss_flags |= PFSS_PAWS; 2730 } 2731 } 2732 2733 /* I have a dream.... TCP segment reassembly.... */ 2734 return (0); 2735} 2736 2737static int 2738pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif, 2739 struct pf_pdesc *pd, struct mbuf *m, struct tcphdr *th, int off, 2740 int *rewrptr) 2741{ 2742#pragma unused(dir, kif) 2743 sa_family_t af = pd->af; 2744 u_int16_t *mss; 2745 int thoff; 2746 int opt, cnt, optlen = 0; 2747 int rewrite = 0; 2748 u_char opts[MAX_TCPOPTLEN]; 2749 u_char *optp = opts; 2750 2751 thoff = th->th_off << 2; 2752 cnt = thoff - sizeof (struct tcphdr); 2753 2754 if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt, 2755 NULL, NULL, af)) 2756 return PF_DROP; 2757 2758 for (; cnt > 0; cnt -= optlen, optp += optlen) { 2759 opt = optp[0]; 2760 if (opt == TCPOPT_EOL) 2761 break; 2762 if (opt == TCPOPT_NOP) 2763 optlen = 1; 2764 else { 2765 if (cnt < 2) 2766 break; 2767 optlen = optp[1]; 2768 if (optlen < 2 || optlen > cnt) 2769 break; 2770 } 2771 switch (opt) { 2772 case TCPOPT_MAXSEG: 2773 mss = (u_int16_t *)(void *)(optp + 2); 2774 if ((ntohs(*mss)) > r->max_mss) { 2775 /* 2776 * <jhw@apple.com> 2777 * Only do the TCP checksum fixup if delayed 2778 * checksum calculation will not be performed. 2779 */ 2780 if (m->m_pkthdr.rcvif || 2781 !(m->m_pkthdr.csum_flags & CSUM_TCP)) 2782 th->th_sum = pf_cksum_fixup(th->th_sum, 2783 *mss, htons(r->max_mss), 0); 2784 *mss = htons(r->max_mss); 2785 rewrite = 1; 2786 } 2787 break; 2788 default: 2789 break; 2790 } 2791 } 2792 2793 if (rewrite) { 2794 struct mbuf *mw; 2795 u_short reason; 2796 2797 mw = pf_lazy_makewritable(pd, pd->mp, 2798 off + sizeof (*th) + thoff); 2799 if (!mw) { 2800 REASON_SET(&reason, PFRES_MEMORY); 2801 if (r->log) 2802 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, 2803 r, 0, 0, pd); 2804 return PF_DROP; 2805 } 2806 2807 *rewrptr = 1; 2808 m_copyback(mw, off + sizeof (*th), thoff - sizeof (*th), opts); 2809 } 2810 2811 return PF_PASS; 2812} 2813