ip_fw_nat.c revision 220837
1/*- 2 * Copyright (c) 2008 Paolo Pisati 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_nat.c 220837 2011-04-19 15:06:33Z glebius $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/eventhandler.h> 33#include <sys/malloc.h> 34#include <sys/kernel.h> 35#include <sys/lock.h> 36#include <sys/module.h> 37#include <sys/rwlock.h> 38 39#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */ 40 41#include <netinet/libalias/alias.h> 42#include <netinet/libalias/alias_local.h> 43 44#include <net/if.h> 45#include <netinet/in.h> 46#include <netinet/ip.h> 47#include <netinet/ip_var.h> 48#include <netinet/ip_fw.h> 49#include <netinet/ipfw/ip_fw_private.h> 50#include <netinet/tcp.h> 51#include <netinet/udp.h> 52 53#include <machine/in_cksum.h> /* XXX for in_cksum */ 54 55static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag); 56#define V_ifaddr_event_tag VNET(ifaddr_event_tag) 57 58static void 59ifaddr_change(void *arg __unused, struct ifnet *ifp) 60{ 61 struct cfg_nat *ptr; 62 struct ifaddr *ifa; 63 struct ip_fw_chain *chain; 64 65 chain = &V_layer3_chain; 66 IPFW_WLOCK(chain); 67 /* Check every nat entry... */ 68 LIST_FOREACH(ptr, &chain->nat, _next) { 69 /* ...using nic 'ifp->if_xname' as dynamic alias address. */ 70 if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0) 71 continue; 72 if_addr_rlock(ifp); 73 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 74 if (ifa->ifa_addr == NULL) 75 continue; 76 if (ifa->ifa_addr->sa_family != AF_INET) 77 continue; 78 ptr->ip = ((struct sockaddr_in *) 79 (ifa->ifa_addr))->sin_addr; 80 LibAliasSetAddress(ptr->lib, ptr->ip); 81 } 82 if_addr_runlock(ifp); 83 } 84 IPFW_WUNLOCK(chain); 85} 86 87/* 88 * delete the pointers for nat entry ix, or all of them if ix < 0 89 */ 90static void 91flush_nat_ptrs(struct ip_fw_chain *chain, const int ix) 92{ 93 int i; 94 ipfw_insn_nat *cmd; 95 96 IPFW_WLOCK_ASSERT(chain); 97 for (i = 0; i < chain->n_rules; i++) { 98 cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]); 99 /* XXX skip log and the like ? */ 100 if (cmd->o.opcode == O_NAT && cmd->nat != NULL && 101 (ix < 0 || cmd->nat->id == ix)) 102 cmd->nat = NULL; 103 } 104} 105 106static void 107del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head) 108{ 109 struct cfg_redir *r, *tmp_r; 110 struct cfg_spool *s, *tmp_s; 111 int i, num; 112 113 LIST_FOREACH_SAFE(r, head, _next, tmp_r) { 114 num = 1; /* Number of alias_link to delete. */ 115 switch (r->mode) { 116 case REDIR_PORT: 117 num = r->pport_cnt; 118 /* FALLTHROUGH */ 119 case REDIR_ADDR: 120 case REDIR_PROTO: 121 /* Delete all libalias redirect entry. */ 122 for (i = 0; i < num; i++) 123 LibAliasRedirectDelete(n->lib, r->alink[i]); 124 /* Del spool cfg if any. */ 125 LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) { 126 LIST_REMOVE(s, _next); 127 free(s, M_IPFW); 128 } 129 free(r->alink, M_IPFW); 130 LIST_REMOVE(r, _next); 131 free(r, M_IPFW); 132 break; 133 default: 134 printf("unknown redirect mode: %u\n", r->mode); 135 /* XXX - panic?!?!? */ 136 break; 137 } 138 } 139} 140 141static void 142add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) 143{ 144 struct cfg_redir *r, *ser_r; 145 struct cfg_spool *s, *ser_s; 146 int cnt, off, i; 147 148 for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) { 149 ser_r = (struct cfg_redir *)&buf[off]; 150 r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO); 151 memcpy(r, ser_r, SOF_REDIR); 152 LIST_INIT(&r->spool_chain); 153 off += SOF_REDIR; 154 r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt, 155 M_IPFW, M_WAITOK | M_ZERO); 156 switch (r->mode) { 157 case REDIR_ADDR: 158 r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr, 159 r->paddr); 160 break; 161 case REDIR_PORT: 162 for (i = 0 ; i < r->pport_cnt; i++) { 163 /* If remotePort is all ports, set it to 0. */ 164 u_short remotePortCopy = r->rport + i; 165 if (r->rport_cnt == 1 && r->rport == 0) 166 remotePortCopy = 0; 167 r->alink[i] = LibAliasRedirectPort(ptr->lib, 168 r->laddr, htons(r->lport + i), r->raddr, 169 htons(remotePortCopy), r->paddr, 170 htons(r->pport + i), r->proto); 171 if (r->alink[i] == NULL) { 172 r->alink[0] = NULL; 173 break; 174 } 175 } 176 break; 177 case REDIR_PROTO: 178 r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr, 179 r->raddr, r->paddr, r->proto); 180 break; 181 default: 182 printf("unknown redirect mode: %u\n", r->mode); 183 break; 184 } 185 /* XXX perhaps return an error instead of panic ? */ 186 if (r->alink[0] == NULL) 187 panic("LibAliasRedirect* returned NULL"); 188 /* LSNAT handling. */ 189 for (i = 0; i < r->spool_cnt; i++) { 190 ser_s = (struct cfg_spool *)&buf[off]; 191 s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO); 192 memcpy(s, ser_s, SOF_SPOOL); 193 LibAliasAddServer(ptr->lib, r->alink[0], 194 s->addr, htons(s->port)); 195 off += SOF_SPOOL; 196 /* Hook spool entry. */ 197 LIST_INSERT_HEAD(&r->spool_chain, s, _next); 198 } 199 /* And finally hook this redir entry. */ 200 LIST_INSERT_HEAD(&ptr->redir_chain, r, _next); 201 } 202} 203 204static int 205ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) 206{ 207 struct mbuf *mcl; 208 struct ip *ip; 209 /* XXX - libalias duct tape */ 210 int ldt, retval; 211 char *c; 212 213 ldt = 0; 214 retval = 0; 215 mcl = m_megapullup(m, m->m_pkthdr.len); 216 if (mcl == NULL) { 217 args->m = NULL; 218 return (IP_FW_DENY); 219 } 220 ip = mtod(mcl, struct ip *); 221 222 /* 223 * XXX - Libalias checksum offload 'duct tape': 224 * 225 * locally generated packets have only pseudo-header checksum 226 * calculated and libalias will break it[1], so mark them for 227 * later fix. Moreover there are cases when libalias modifies 228 * tcp packet data[2], mark them for later fix too. 229 * 230 * [1] libalias was never meant to run in kernel, so it does 231 * not have any knowledge about checksum offloading, and 232 * expects a packet with a full internet checksum. 233 * Unfortunately, packets generated locally will have just the 234 * pseudo header calculated, and when libalias tries to adjust 235 * the checksum it will actually compute a wrong value. 236 * 237 * [2] when libalias modifies tcp's data content, full TCP 238 * checksum has to be recomputed: the problem is that 239 * libalias does not have any idea about checksum offloading. 240 * To work around this, we do not do checksumming in LibAlias, 241 * but only mark the packets in th_x2 field. If we receive a 242 * marked packet, we calculate correct checksum for it 243 * aware of offloading. Why such a terrible hack instead of 244 * recalculating checksum for each packet? 245 * Because the previous checksum was not checked! 246 * Recalculating checksums for EVERY packet will hide ALL 247 * transmission errors. Yes, marked packets still suffer from 248 * this problem. But, sigh, natd(8) has this problem, too. 249 * 250 * TODO: -make libalias mbuf aware (so 251 * it can handle delayed checksum and tso) 252 */ 253 254 if (mcl->m_pkthdr.rcvif == NULL && 255 mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) 256 ldt = 1; 257 258 c = mtod(mcl, char *); 259 if (args->oif == NULL) 260 retval = LibAliasIn(t->lib, c, 261 mcl->m_len + M_TRAILINGSPACE(mcl)); 262 else 263 retval = LibAliasOut(t->lib, c, 264 mcl->m_len + M_TRAILINGSPACE(mcl)); 265 if (retval == PKT_ALIAS_RESPOND) { 266 m->m_flags |= M_SKIP_FIREWALL; 267 retval = PKT_ALIAS_OK; 268 } 269 if (retval != PKT_ALIAS_OK && 270 retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) { 271 /* XXX - should i add some logging? */ 272 m_free(mcl); 273 args->m = NULL; 274 return (IP_FW_DENY); 275 } 276 mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len); 277 278 /* 279 * XXX - libalias checksum offload 280 * 'duct tape' (see above) 281 */ 282 283 if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && 284 ip->ip_p == IPPROTO_TCP) { 285 struct tcphdr *th; 286 287 th = (struct tcphdr *)(ip + 1); 288 if (th->th_x2) 289 ldt = 1; 290 } 291 292 if (ldt) { 293 struct tcphdr *th; 294 struct udphdr *uh; 295 u_short cksum; 296 297 ip->ip_len = ntohs(ip->ip_len); 298 cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 299 htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2))); 300 301 switch (ip->ip_p) { 302 case IPPROTO_TCP: 303 th = (struct tcphdr *)(ip + 1); 304 /* 305 * Maybe it was set in 306 * libalias... 307 */ 308 th->th_x2 = 0; 309 th->th_sum = cksum; 310 mcl->m_pkthdr.csum_data = 311 offsetof(struct tcphdr, th_sum); 312 break; 313 case IPPROTO_UDP: 314 uh = (struct udphdr *)(ip + 1); 315 uh->uh_sum = cksum; 316 mcl->m_pkthdr.csum_data = 317 offsetof(struct udphdr, uh_sum); 318 break; 319 } 320 /* No hw checksum offloading: do it ourselves */ 321 if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) { 322 in_delayed_cksum(mcl); 323 mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 324 } 325 ip->ip_len = htons(ip->ip_len); 326 } 327 args->m = mcl; 328 return (IP_FW_NAT); 329} 330 331static struct cfg_nat * 332lookup_nat(struct nat_list *l, int nat_id) 333{ 334 struct cfg_nat *res; 335 336 LIST_FOREACH(res, l, _next) { 337 if (res->id == nat_id) 338 break; 339 } 340 return res; 341} 342 343static int 344ipfw_nat_cfg(struct sockopt *sopt) 345{ 346 struct cfg_nat *cfg, *ptr; 347 char *buf; 348 struct ip_fw_chain *chain = &V_layer3_chain; 349 int gencnt, len, error = 0; 350 351 len = sopt->sopt_valsize; 352 buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 353 if ((error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat))) != 0) 354 goto out; 355 356 cfg = (struct cfg_nat *)buf; 357 if (cfg->id < 0) { 358 error = EINVAL; 359 goto out; 360 } 361 362 /* 363 * Find/create nat rule. 364 */ 365 IPFW_WLOCK(chain); 366 gencnt = chain->gencnt; 367 ptr = lookup_nat(&chain->nat, cfg->id); 368 if (ptr == NULL) { 369 IPFW_WUNLOCK(chain); 370 /* New rule: allocate and init new instance. */ 371 ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO); 372 ptr->lib = LibAliasInit(NULL); 373 LIST_INIT(&ptr->redir_chain); 374 } else { 375 /* Entry already present: temporarily unhook it. */ 376 LIST_REMOVE(ptr, _next); 377 flush_nat_ptrs(chain, cfg->id); 378 IPFW_WUNLOCK(chain); 379 } 380 381 /* 382 * Basic nat configuration. 383 */ 384 ptr->id = cfg->id; 385 /* 386 * XXX - what if this rule doesn't nat any ip and just 387 * redirect? 388 * do we set aliasaddress to 0.0.0.0? 389 */ 390 ptr->ip = cfg->ip; 391 ptr->redir_cnt = cfg->redir_cnt; 392 ptr->mode = cfg->mode; 393 LibAliasSetMode(ptr->lib, cfg->mode, cfg->mode); 394 LibAliasSetAddress(ptr->lib, ptr->ip); 395 memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE); 396 397 /* 398 * Redir and LSNAT configuration. 399 */ 400 /* Delete old cfgs. */ 401 del_redir_spool_cfg(ptr, &ptr->redir_chain); 402 /* Add new entries. */ 403 add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr); 404 405 IPFW_WLOCK(chain); 406 /* Extra check to avoid race with another ipfw_nat_cfg() */ 407 if (gencnt != chain->gencnt && 408 ((cfg = lookup_nat(&chain->nat, ptr->id)) != NULL)) 409 LIST_REMOVE(cfg, _next); 410 LIST_INSERT_HEAD(&chain->nat, ptr, _next); 411 chain->gencnt++; 412 IPFW_WUNLOCK(chain); 413 414out: 415 free(buf, M_TEMP); 416 return (error); 417} 418 419static int 420ipfw_nat_del(struct sockopt *sopt) 421{ 422 struct cfg_nat *ptr; 423 struct ip_fw_chain *chain = &V_layer3_chain; 424 int i; 425 426 sooptcopyin(sopt, &i, sizeof i, sizeof i); 427 /* XXX validate i */ 428 IPFW_WLOCK(chain); 429 ptr = lookup_nat(&chain->nat, i); 430 if (ptr == NULL) { 431 IPFW_WUNLOCK(chain); 432 return (EINVAL); 433 } 434 LIST_REMOVE(ptr, _next); 435 flush_nat_ptrs(chain, i); 436 IPFW_WUNLOCK(chain); 437 del_redir_spool_cfg(ptr, &ptr->redir_chain); 438 LibAliasUninit(ptr->lib); 439 free(ptr, M_IPFW); 440 return (0); 441} 442 443static int 444ipfw_nat_get_cfg(struct sockopt *sopt) 445{ 446 struct ip_fw_chain *chain = &V_layer3_chain; 447 struct cfg_nat *n; 448 struct cfg_redir *r; 449 struct cfg_spool *s; 450 char *data; 451 int gencnt, nat_cnt, len, error; 452 453 nat_cnt = 0; 454 len = sizeof(nat_cnt); 455 456 IPFW_RLOCK(chain); 457retry: 458 gencnt = chain->gencnt; 459 /* Estimate memory amount */ 460 LIST_FOREACH(n, &chain->nat, _next) { 461 nat_cnt++; 462 len += sizeof(struct cfg_nat); 463 LIST_FOREACH(r, &n->redir_chain, _next) { 464 len += sizeof(struct cfg_redir); 465 LIST_FOREACH(s, &r->spool_chain, _next) 466 len += sizeof(struct cfg_spool); 467 } 468 } 469 IPFW_RUNLOCK(chain); 470 471 data = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 472 bcopy(&nat_cnt, data, sizeof(nat_cnt)); 473 474 nat_cnt = 0; 475 len = sizeof(nat_cnt); 476 477 IPFW_RLOCK(chain); 478 if (gencnt != chain->gencnt) { 479 free(data, M_TEMP); 480 goto retry; 481 } 482 /* Serialize all the data. */ 483 LIST_FOREACH(n, &chain->nat, _next) { 484 bcopy(n, &data[len], sizeof(struct cfg_nat)); 485 len += sizeof(struct cfg_nat); 486 LIST_FOREACH(r, &n->redir_chain, _next) { 487 bcopy(r, &data[len], sizeof(struct cfg_redir)); 488 len += sizeof(struct cfg_redir); 489 LIST_FOREACH(s, &r->spool_chain, _next) { 490 bcopy(s, &data[len], sizeof(struct cfg_spool)); 491 len += sizeof(struct cfg_spool); 492 } 493 } 494 } 495 IPFW_RUNLOCK(chain); 496 497 error = sooptcopyout(sopt, data, len); 498 free(data, M_TEMP); 499 500 return (error); 501} 502 503static int 504ipfw_nat_get_log(struct sockopt *sopt) 505{ 506 uint8_t *data; 507 struct cfg_nat *ptr; 508 int i, size; 509 struct ip_fw_chain *chain; 510 511 chain = &V_layer3_chain; 512 513 IPFW_RLOCK(chain); 514 /* one pass to count, one to copy the data */ 515 i = 0; 516 LIST_FOREACH(ptr, &chain->nat, _next) { 517 if (ptr->lib->logDesc == NULL) 518 continue; 519 i++; 520 } 521 size = i * (LIBALIAS_BUF_SIZE + sizeof(int)); 522 data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO); 523 if (data == NULL) { 524 IPFW_RUNLOCK(chain); 525 return (ENOSPC); 526 } 527 i = 0; 528 LIST_FOREACH(ptr, &chain->nat, _next) { 529 if (ptr->lib->logDesc == NULL) 530 continue; 531 bcopy(&ptr->id, &data[i], sizeof(int)); 532 i += sizeof(int); 533 bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE); 534 i += LIBALIAS_BUF_SIZE; 535 } 536 IPFW_RUNLOCK(chain); 537 sooptcopyout(sopt, data, size); 538 free(data, M_IPFW); 539 return(0); 540} 541 542static void 543ipfw_nat_init(void) 544{ 545 546 IPFW_WLOCK(&V_layer3_chain); 547 /* init ipfw hooks */ 548 ipfw_nat_ptr = ipfw_nat; 549 lookup_nat_ptr = lookup_nat; 550 ipfw_nat_cfg_ptr = ipfw_nat_cfg; 551 ipfw_nat_del_ptr = ipfw_nat_del; 552 ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg; 553 ipfw_nat_get_log_ptr = ipfw_nat_get_log; 554 IPFW_WUNLOCK(&V_layer3_chain); 555 V_ifaddr_event_tag = EVENTHANDLER_REGISTER( 556 ifaddr_event, ifaddr_change, 557 NULL, EVENTHANDLER_PRI_ANY); 558} 559 560static void 561ipfw_nat_destroy(void) 562{ 563 struct cfg_nat *ptr, *ptr_temp; 564 struct ip_fw_chain *chain; 565 566 chain = &V_layer3_chain; 567 IPFW_WLOCK(chain); 568 LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) { 569 LIST_REMOVE(ptr, _next); 570 del_redir_spool_cfg(ptr, &ptr->redir_chain); 571 LibAliasUninit(ptr->lib); 572 free(ptr, M_IPFW); 573 } 574 EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag); 575 flush_nat_ptrs(chain, -1 /* flush all */); 576 /* deregister ipfw_nat */ 577 ipfw_nat_ptr = NULL; 578 lookup_nat_ptr = NULL; 579 ipfw_nat_cfg_ptr = NULL; 580 ipfw_nat_del_ptr = NULL; 581 ipfw_nat_get_cfg_ptr = NULL; 582 ipfw_nat_get_log_ptr = NULL; 583 IPFW_WUNLOCK(chain); 584} 585 586static int 587ipfw_nat_modevent(module_t mod, int type, void *unused) 588{ 589 int err = 0; 590 591 switch (type) { 592 case MOD_LOAD: 593 ipfw_nat_init(); 594 break; 595 596 case MOD_UNLOAD: 597 ipfw_nat_destroy(); 598 break; 599 600 default: 601 return EOPNOTSUPP; 602 break; 603 } 604 return err; 605} 606 607static moduledata_t ipfw_nat_mod = { 608 "ipfw_nat", 609 ipfw_nat_modevent, 610 0 611}; 612 613DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 614MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1); 615MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2); 616MODULE_VERSION(ipfw_nat, 1); 617/* end of file */ 618