ip_fw_nat.c revision 176669
1/*- 2 * Copyright (c) 2008 Paolo Pisati 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/netinet/ip_fw_nat.c 176669 2008-02-29 22:27:19Z piso $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/condvar.h> 33#include <sys/eventhandler.h> 34#include <sys/malloc.h> 35#include <sys/mbuf.h> 36#include <sys/kernel.h> 37#include <sys/lock.h> 38#include <sys/jail.h> 39#include <sys/module.h> 40#include <sys/priv.h> 41#include <sys/proc.h> 42#include <sys/rwlock.h> 43#include <sys/socket.h> 44#include <sys/socketvar.h> 45#include <sys/sysctl.h> 46#include <sys/syslog.h> 47#include <sys/ucred.h> 48 49#include <netinet/libalias/alias.h> 50#include <netinet/libalias/alias_local.h> 51 52#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */ 53 54#include <net/if.h> 55#include <netinet/in.h> 56#include <netinet/ip.h> 57#include <netinet/ip_var.h> 58#include <netinet/ip_icmp.h> 59#include <netinet/ip_fw.h> 60#include <netinet/tcp.h> 61#include <netinet/tcp_timer.h> 62#include <netinet/tcp_var.h> 63#include <netinet/tcpip.h> 64#include <netinet/udp.h> 65#include <netinet/udp_var.h> 66 67#include <machine/in_cksum.h> /* XXX for in_cksum */ 68 69MALLOC_DECLARE(M_IPFW); 70 71extern struct ip_fw_chain layer3_chain; 72 73static eventhandler_tag ifaddr_event_tag; 74 75extern ipfw_nat_t *ipfw_nat_ptr; 76extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; 77extern ipfw_nat_cfg_t *ipfw_nat_del_ptr; 78extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; 79extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; 80 81static void 82ifaddr_change(void *arg __unused, struct ifnet *ifp) 83{ 84 struct cfg_nat *ptr; 85 struct ifaddr *ifa; 86 87 IPFW_WLOCK(&layer3_chain); 88 /* Check every nat entry... */ 89 LIST_FOREACH(ptr, &layer3_chain.nat, _next) { 90 /* ...using nic 'ifp->if_xname' as dynamic alias address. */ 91 if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) == 0) { 92 mtx_lock(&ifp->if_addr_mtx); 93 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 94 if (ifa->ifa_addr == NULL) 95 continue; 96 if (ifa->ifa_addr->sa_family != AF_INET) 97 continue; 98 ptr->ip = ((struct sockaddr_in *) 99 (ifa->ifa_addr))->sin_addr; 100 LibAliasSetAddress(ptr->lib, ptr->ip); 101 } 102 mtx_unlock(&ifp->if_addr_mtx); 103 } 104 } 105 IPFW_WUNLOCK(&layer3_chain); 106} 107 108static void 109flush_nat_ptrs(const int i) 110{ 111 struct ip_fw *rule; 112 113 IPFW_WLOCK_ASSERT(&layer3_chain); 114 for (rule = layer3_chain.rules; rule; rule = rule->next) { 115 ipfw_insn_nat *cmd = (ipfw_insn_nat *)ACTION_PTR(rule); 116 if (cmd->o.opcode != O_NAT) 117 continue; 118 if (cmd->nat != NULL && cmd->nat->id == i) 119 cmd->nat = NULL; 120 } 121} 122 123#define HOOK_NAT(b, p) do { \ 124 IPFW_WLOCK_ASSERT(&layer3_chain); \ 125 LIST_INSERT_HEAD(b, p, _next); \ 126 } while (0) 127 128#define UNHOOK_NAT(p) do { \ 129 IPFW_WLOCK_ASSERT(&layer3_chain); \ 130 LIST_REMOVE(p, _next); \ 131 } while (0) 132 133#define HOOK_REDIR(b, p) do { \ 134 LIST_INSERT_HEAD(b, p, _next); \ 135 } while (0) 136 137#define HOOK_SPOOL(b, p) do { \ 138 LIST_INSERT_HEAD(b, p, _next); \ 139 } while (0) 140 141static void 142del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head) 143{ 144 struct cfg_redir *r, *tmp_r; 145 struct cfg_spool *s, *tmp_s; 146 int i, num; 147 148 LIST_FOREACH_SAFE(r, head, _next, tmp_r) { 149 num = 1; /* Number of alias_link to delete. */ 150 switch (r->mode) { 151 case REDIR_PORT: 152 num = r->pport_cnt; 153 /* FALLTHROUGH */ 154 case REDIR_ADDR: 155 case REDIR_PROTO: 156 /* Delete all libalias redirect entry. */ 157 for (i = 0; i < num; i++) 158 LibAliasRedirectDelete(n->lib, r->alink[i]); 159 /* Del spool cfg if any. */ 160 LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) { 161 LIST_REMOVE(s, _next); 162 free(s, M_IPFW); 163 } 164 free(r->alink, M_IPFW); 165 LIST_REMOVE(r, _next); 166 free(r, M_IPFW); 167 break; 168 default: 169 printf("unknown redirect mode: %u\n", r->mode); 170 /* XXX - panic?!?!? */ 171 break; 172 } 173 } 174} 175 176static int 177add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) 178{ 179 struct cfg_redir *r, *ser_r; 180 struct cfg_spool *s, *ser_s; 181 int cnt, off, i; 182 char *panic_err; 183 184 for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) { 185 ser_r = (struct cfg_redir *)&buf[off]; 186 r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO); 187 memcpy(r, ser_r, SOF_REDIR); 188 LIST_INIT(&r->spool_chain); 189 off += SOF_REDIR; 190 r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt, 191 M_IPFW, M_WAITOK | M_ZERO); 192 switch (r->mode) { 193 case REDIR_ADDR: 194 r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr, 195 r->paddr); 196 break; 197 case REDIR_PORT: 198 for (i = 0 ; i < r->pport_cnt; i++) { 199 /* If remotePort is all ports, set it to 0. */ 200 u_short remotePortCopy = r->rport + i; 201 if (r->rport_cnt == 1 && r->rport == 0) 202 remotePortCopy = 0; 203 r->alink[i] = LibAliasRedirectPort(ptr->lib, 204 r->laddr, htons(r->lport + i), r->raddr, 205 htons(remotePortCopy), r->paddr, 206 htons(r->pport + i), r->proto); 207 if (r->alink[i] == NULL) { 208 r->alink[0] = NULL; 209 break; 210 } 211 } 212 break; 213 case REDIR_PROTO: 214 r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr, 215 r->raddr, r->paddr, r->proto); 216 break; 217 default: 218 printf("unknown redirect mode: %u\n", r->mode); 219 break; 220 } 221 if (r->alink[0] == NULL) { 222 panic_err = "LibAliasRedirect* returned NULL"; 223 goto bad; 224 } else /* LSNAT handling. */ 225 for (i = 0; i < r->spool_cnt; i++) { 226 ser_s = (struct cfg_spool *)&buf[off]; 227 s = malloc(SOF_REDIR, M_IPFW, 228 M_WAITOK | M_ZERO); 229 memcpy(s, ser_s, SOF_SPOOL); 230 LibAliasAddServer(ptr->lib, r->alink[0], 231 s->addr, htons(s->port)); 232 off += SOF_SPOOL; 233 /* Hook spool entry. */ 234 HOOK_SPOOL(&r->spool_chain, s); 235 } 236 /* And finally hook this redir entry. */ 237 HOOK_REDIR(&ptr->redir_chain, r); 238 } 239 return (1); 240bad: 241 /* something really bad happened: panic! */ 242 panic("%s\n", panic_err); 243} 244 245static int 246ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) 247{ 248 struct mbuf *mcl; 249 struct ip *ip; 250 /* XXX - libalias duct tape */ 251 int ldt, retval; 252 char *c; 253 254 ldt = 0; 255 retval = 0; 256 if ((mcl = m_megapullup(m, m->m_pkthdr.len)) == 257 NULL) 258 goto badnat; 259 ip = mtod(mcl, struct ip *); 260 if (args->eh == NULL) { 261 ip->ip_len = htons(ip->ip_len); 262 ip->ip_off = htons(ip->ip_off); 263 } 264 265 /* 266 * XXX - Libalias checksum offload 'duct tape': 267 * 268 * locally generated packets have only 269 * pseudo-header checksum calculated 270 * and libalias will screw it[1], so 271 * mark them for later fix. Moreover 272 * there are cases when libalias 273 * modify tcp packet data[2], mark it 274 * for later fix too. 275 * 276 * [1] libalias was never meant to run 277 * in kernel, so it doesn't have any 278 * knowledge about checksum 279 * offloading, and it expects a packet 280 * with a full internet 281 * checksum. Unfortunately, packets 282 * generated locally will have just the 283 * pseudo header calculated, and when 284 * libalias tries to adjust the 285 * checksum it will actually screw it. 286 * 287 * [2] when libalias modify tcp's data 288 * content, full TCP checksum has to 289 * be recomputed: the problem is that 290 * libalias doesn't have any idea 291 * about checksum offloading To 292 * workaround this, we do not do 293 * checksumming in LibAlias, but only 294 * mark the packets in th_x2 field. If 295 * we receive a marked packet, we 296 * calculate correct checksum for it 297 * aware of offloading. Why such a 298 * terrible hack instead of 299 * recalculating checksum for each 300 * packet? Because the previous 301 * checksum was not checked! 302 * Recalculating checksums for EVERY 303 * packet will hide ALL transmission 304 * errors. Yes, marked packets still 305 * suffer from this problem. But, 306 * sigh, natd(8) has this problem, 307 * too. 308 * 309 * TODO: -make libalias mbuf aware (so 310 * it can handle delayed checksum and tso) 311 */ 312 313 if (mcl->m_pkthdr.rcvif == NULL && 314 mcl->m_pkthdr.csum_flags & 315 CSUM_DELAY_DATA) 316 ldt = 1; 317 318 c = mtod(mcl, char *); 319 if (args->oif == NULL) 320 retval = LibAliasIn(t->lib, c, 321 MCLBYTES); 322 else 323 retval = LibAliasOut(t->lib, c, 324 MCLBYTES); 325 if (retval != PKT_ALIAS_OK) { 326 /* XXX - should i add some logging? */ 327 m_free(mcl); 328 badnat: 329 args->m = NULL; 330 return (IP_FW_DENY); 331 } 332 mcl->m_pkthdr.len = mcl->m_len = 333 ntohs(ip->ip_len); 334 335 /* 336 * XXX - libalias checksum offload 337 * 'duct tape' (see above) 338 */ 339 340 if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && 341 ip->ip_p == IPPROTO_TCP) { 342 struct tcphdr *th; 343 344 th = (struct tcphdr *)(ip + 1); 345 if (th->th_x2) 346 ldt = 1; 347 } 348 349 if (ldt) { 350 struct tcphdr *th; 351 struct udphdr *uh; 352 u_short cksum; 353 354 ip->ip_len = ntohs(ip->ip_len); 355 cksum = in_pseudo( 356 ip->ip_src.s_addr, 357 ip->ip_dst.s_addr, 358 htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2)) 359 ); 360 361 switch (ip->ip_p) { 362 case IPPROTO_TCP: 363 th = (struct tcphdr *)(ip + 1); 364 /* 365 * Maybe it was set in 366 * libalias... 367 */ 368 th->th_x2 = 0; 369 th->th_sum = cksum; 370 mcl->m_pkthdr.csum_data = 371 offsetof(struct tcphdr, th_sum); 372 break; 373 case IPPROTO_UDP: 374 uh = (struct udphdr *)(ip + 1); 375 uh->uh_sum = cksum; 376 mcl->m_pkthdr.csum_data = 377 offsetof(struct udphdr, uh_sum); 378 break; 379 } 380 /* 381 * No hw checksum offloading: do it 382 * by ourself. 383 */ 384 if ((mcl->m_pkthdr.csum_flags & 385 CSUM_DELAY_DATA) == 0) { 386 in_delayed_cksum(mcl); 387 mcl->m_pkthdr.csum_flags &= 388 ~CSUM_DELAY_DATA; 389 } 390 ip->ip_len = htons(ip->ip_len); 391 } 392 393 if (args->eh == NULL) { 394 ip->ip_len = ntohs(ip->ip_len); 395 ip->ip_off = ntohs(ip->ip_off); 396 } 397 398 args->m = mcl; 399 return (IP_FW_NAT); 400} 401 402static int 403ipfw_nat_cfg(struct sockopt *sopt) 404{ 405 struct cfg_nat *ptr, *ser_n; 406 char *buf; 407 408 buf = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO); 409 sooptcopyin(sopt, buf, NAT_BUF_LEN, 410 sizeof(struct cfg_nat)); 411 ser_n = (struct cfg_nat *)buf; 412 413 /* 414 * Find/create nat rule. 415 */ 416 IPFW_WLOCK(&layer3_chain); 417 LOOKUP_NAT(layer3_chain, ser_n->id, ptr); 418 if (ptr == NULL) { 419 /* New rule: allocate and init new instance. */ 420 ptr = malloc(sizeof(struct cfg_nat), 421 M_IPFW, M_NOWAIT | M_ZERO); 422 if (ptr == NULL) { 423 IPFW_WUNLOCK(&layer3_chain); 424 free(buf, M_IPFW); 425 return (ENOSPC); 426 } 427 ptr->lib = LibAliasInit(NULL); 428 if (ptr->lib == NULL) { 429 IPFW_WUNLOCK(&layer3_chain); 430 free(ptr, M_IPFW); 431 free(buf, M_IPFW); 432 return (EINVAL); 433 } 434 LIST_INIT(&ptr->redir_chain); 435 } else { 436 /* Entry already present: temporarly unhook it. */ 437 UNHOOK_NAT(ptr); 438 flush_nat_ptrs(ser_n->id); 439 } 440 IPFW_WUNLOCK(&layer3_chain); 441 442 /* 443 * Basic nat configuration. 444 */ 445 ptr->id = ser_n->id; 446 /* 447 * XXX - what if this rule doesn't nat any ip and just 448 * redirect? 449 * do we set aliasaddress to 0.0.0.0? 450 */ 451 ptr->ip = ser_n->ip; 452 ptr->redir_cnt = ser_n->redir_cnt; 453 ptr->mode = ser_n->mode; 454 LibAliasSetMode(ptr->lib, ser_n->mode, ser_n->mode); 455 LibAliasSetAddress(ptr->lib, ptr->ip); 456 memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE); 457 458 /* 459 * Redir and LSNAT configuration. 460 */ 461 /* Delete old cfgs. */ 462 del_redir_spool_cfg(ptr, &ptr->redir_chain); 463 /* Add new entries. */ 464 add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr); 465 free(buf, M_IPFW); 466 IPFW_WLOCK(&layer3_chain); 467 HOOK_NAT(&layer3_chain.nat, ptr); 468 IPFW_WUNLOCK(&layer3_chain); 469 return (0); 470} 471 472static int 473ipfw_nat_del(struct sockopt *sopt) 474{ 475 struct cfg_nat *ptr; 476 int i; 477 478 sooptcopyin(sopt, &i, sizeof i, sizeof i); 479 IPFW_WLOCK(&layer3_chain); 480 LOOKUP_NAT(layer3_chain, i, ptr); 481 if (ptr == NULL) { 482 IPFW_WUNLOCK(&layer3_chain); 483 return (EINVAL); 484 } 485 UNHOOK_NAT(ptr); 486 flush_nat_ptrs(i); 487 IPFW_WUNLOCK(&layer3_chain); 488 del_redir_spool_cfg(ptr, &ptr->redir_chain); 489 LibAliasUninit(ptr->lib); 490 free(ptr, M_IPFW); 491 return (0); 492} 493 494static int 495ipfw_nat_get_cfg(struct sockopt *sopt) 496{ 497 uint8_t *data; 498 struct cfg_nat *n; 499 struct cfg_redir *r; 500 struct cfg_spool *s; 501 int nat_cnt, off; 502 503 nat_cnt = 0; 504 off = sizeof(nat_cnt); 505 506 data = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO); 507 IPFW_RLOCK(&layer3_chain); 508 /* Serialize all the data. */ 509 LIST_FOREACH(n, &layer3_chain.nat, _next) { 510 nat_cnt++; 511 if (off + SOF_NAT < NAT_BUF_LEN) { 512 bcopy(n, &data[off], SOF_NAT); 513 off += SOF_NAT; 514 LIST_FOREACH(r, &n->redir_chain, _next) { 515 if (off + SOF_REDIR < NAT_BUF_LEN) { 516 bcopy(r, &data[off], 517 SOF_REDIR); 518 off += SOF_REDIR; 519 LIST_FOREACH(s, &r->spool_chain, 520 _next) { 521 if (off + SOF_SPOOL < 522 NAT_BUF_LEN) { 523 bcopy(s, &data[off], 524 SOF_SPOOL); 525 off += SOF_SPOOL; 526 } else 527 goto nospace; 528 } 529 } else 530 goto nospace; 531 } 532 } else 533 goto nospace; 534 } 535 bcopy(&nat_cnt, data, sizeof(nat_cnt)); 536 IPFW_RUNLOCK(&layer3_chain); 537 sooptcopyout(sopt, data, NAT_BUF_LEN); 538 free(data, M_IPFW); 539 return (0); 540nospace: 541 IPFW_RUNLOCK(&layer3_chain); 542 printf("serialized data buffer not big enough:" 543 "please increase NAT_BUF_LEN\n"); 544 free(data, M_IPFW); 545 return (ENOSPC); 546} 547 548static int 549ipfw_nat_get_log(struct sockopt *sopt) 550{ 551 uint8_t *data; 552 struct cfg_nat *ptr; 553 int i, size, cnt, sof; 554 555 data = NULL; 556 sof = LIBALIAS_BUF_SIZE; 557 cnt = 0; 558 559 IPFW_RLOCK(&layer3_chain); 560 size = i = 0; 561 LIST_FOREACH(ptr, &layer3_chain.nat, _next) { 562 if (ptr->lib->logDesc == NULL) 563 continue; 564 cnt++; 565 size = cnt * (sof + sizeof(int)); 566 data = realloc(data, size, M_IPFW, M_NOWAIT | M_ZERO); 567 if (data == NULL) { 568 IPFW_RUNLOCK(&layer3_chain); 569 return (ENOSPC); 570 } 571 bcopy(&ptr->id, &data[i], sizeof(int)); 572 i += sizeof(int); 573 bcopy(ptr->lib->logDesc, &data[i], sof); 574 i += sof; 575 } 576 IPFW_RUNLOCK(&layer3_chain); 577 sooptcopyout(sopt, data, size); 578 free(data, M_IPFW); 579 return(0); 580} 581 582static void 583ipfw_nat_init(void) 584{ 585 586 IPFW_WLOCK(&layer3_chain); 587 /* init ipfw hooks */ 588 ipfw_nat_ptr = ipfw_nat; 589 ipfw_nat_cfg_ptr = ipfw_nat_cfg; 590 ipfw_nat_del_ptr = ipfw_nat_del; 591 ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg; 592 ipfw_nat_get_log_ptr = ipfw_nat_get_log; 593 IPFW_WUNLOCK(&layer3_chain); 594 ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change, 595 NULL, EVENTHANDLER_PRI_ANY); 596} 597 598static void 599ipfw_nat_destroy(void) 600{ 601 struct cfg_nat *ptr, *ptr_temp; 602 603 IPFW_WLOCK(&layer3_chain); 604 LIST_FOREACH_SAFE(ptr, &layer3_chain.nat, _next, ptr_temp) { 605 LIST_REMOVE(ptr, _next); 606 del_redir_spool_cfg(ptr, &ptr->redir_chain); 607 LibAliasUninit(ptr->lib); 608 free(ptr, M_IPFW); 609 } 610 EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag); 611 /* deregister ipfw_nat */ 612 ipfw_nat_ptr = NULL; 613 IPFW_WUNLOCK(&layer3_chain); 614} 615 616static int 617ipfw_nat_modevent(module_t mod, int type, void *unused) 618{ 619 int err = 0; 620 621 switch (type) { 622 case MOD_LOAD: 623 ipfw_nat_init(); 624 break; 625 626 case MOD_UNLOAD: 627 ipfw_nat_destroy(); 628 break; 629 630 default: 631 return EOPNOTSUPP; 632 break; 633 } 634 return err; 635} 636 637static moduledata_t ipfw_nat_mod = { 638 "ipfw_nat", 639 ipfw_nat_modevent, 640 0 641}; 642 643DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 644MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1); 645MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2); 646MODULE_VERSION(ipfw_nat, 1); 647