ip_fw_nat.c revision 220914
1/*- 2 * Copyright (c) 2008 Paolo Pisati 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_nat.c 220914 2011-04-21 08:18:55Z glebius $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/eventhandler.h> 33#include <sys/malloc.h> 34#include <sys/kernel.h> 35#include <sys/lock.h> 36#include <sys/module.h> 37#include <sys/rwlock.h> 38 39#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */ 40 41#include <netinet/libalias/alias.h> 42#include <netinet/libalias/alias_local.h> 43 44#include <net/if.h> 45#include <netinet/in.h> 46#include <netinet/ip.h> 47#include <netinet/ip_var.h> 48#include <netinet/ip_fw.h> 49#include <netinet/ipfw/ip_fw_private.h> 50#include <netinet/tcp.h> 51#include <netinet/udp.h> 52 53#include <machine/in_cksum.h> /* XXX for in_cksum */ 54 55static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag); 56#define V_ifaddr_event_tag VNET(ifaddr_event_tag) 57 58static void 59ifaddr_change(void *arg __unused, struct ifnet *ifp) 60{ 61 struct cfg_nat *ptr; 62 struct ifaddr *ifa; 63 struct ip_fw_chain *chain; 64 65 chain = &V_layer3_chain; 66 IPFW_WLOCK(chain); 67 /* Check every nat entry... */ 68 LIST_FOREACH(ptr, &chain->nat, _next) { 69 /* ...using nic 'ifp->if_xname' as dynamic alias address. */ 70 if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0) 71 continue; 72 if_addr_rlock(ifp); 73 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 74 if (ifa->ifa_addr == NULL) 75 continue; 76 if (ifa->ifa_addr->sa_family != AF_INET) 77 continue; 78 ptr->ip = ((struct sockaddr_in *) 79 (ifa->ifa_addr))->sin_addr; 80 LibAliasSetAddress(ptr->lib, ptr->ip); 81 } 82 if_addr_runlock(ifp); 83 } 84 IPFW_WUNLOCK(chain); 85} 86 87/* 88 * delete the pointers for nat entry ix, or all of them if ix < 0 89 */ 90static void 91flush_nat_ptrs(struct ip_fw_chain *chain, const int ix) 92{ 93 int i; 94 ipfw_insn_nat *cmd; 95 96 IPFW_WLOCK_ASSERT(chain); 97 for (i = 0; i < chain->n_rules; i++) { 98 cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]); 99 /* XXX skip log and the like ? */ 100 if (cmd->o.opcode == O_NAT && cmd->nat != NULL && 101 (ix < 0 || cmd->nat->id == ix)) 102 cmd->nat = NULL; 103 } 104} 105 106static void 107del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head) 108{ 109 struct cfg_redir *r, *tmp_r; 110 struct cfg_spool *s, *tmp_s; 111 int i, num; 112 113 LIST_FOREACH_SAFE(r, head, _next, tmp_r) { 114 num = 1; /* Number of alias_link to delete. */ 115 switch (r->mode) { 116 case REDIR_PORT: 117 num = r->pport_cnt; 118 /* FALLTHROUGH */ 119 case REDIR_ADDR: 120 case REDIR_PROTO: 121 /* Delete all libalias redirect entry. */ 122 for (i = 0; i < num; i++) 123 LibAliasRedirectDelete(n->lib, r->alink[i]); 124 /* Del spool cfg if any. */ 125 LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) { 126 LIST_REMOVE(s, _next); 127 free(s, M_IPFW); 128 } 129 free(r->alink, M_IPFW); 130 LIST_REMOVE(r, _next); 131 free(r, M_IPFW); 132 break; 133 default: 134 printf("unknown redirect mode: %u\n", r->mode); 135 /* XXX - panic?!?!? */ 136 break; 137 } 138 } 139} 140 141static void 142add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) 143{ 144 struct cfg_redir *r, *ser_r; 145 struct cfg_spool *s, *ser_s; 146 int cnt, off, i; 147 148 for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) { 149 ser_r = (struct cfg_redir *)&buf[off]; 150 r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO); 151 memcpy(r, ser_r, SOF_REDIR); 152 LIST_INIT(&r->spool_chain); 153 off += SOF_REDIR; 154 r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt, 155 M_IPFW, M_WAITOK | M_ZERO); 156 switch (r->mode) { 157 case REDIR_ADDR: 158 r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr, 159 r->paddr); 160 break; 161 case REDIR_PORT: 162 for (i = 0 ; i < r->pport_cnt; i++) { 163 /* If remotePort is all ports, set it to 0. */ 164 u_short remotePortCopy = r->rport + i; 165 if (r->rport_cnt == 1 && r->rport == 0) 166 remotePortCopy = 0; 167 r->alink[i] = LibAliasRedirectPort(ptr->lib, 168 r->laddr, htons(r->lport + i), r->raddr, 169 htons(remotePortCopy), r->paddr, 170 htons(r->pport + i), r->proto); 171 if (r->alink[i] == NULL) { 172 r->alink[0] = NULL; 173 break; 174 } 175 } 176 break; 177 case REDIR_PROTO: 178 r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr, 179 r->raddr, r->paddr, r->proto); 180 break; 181 default: 182 printf("unknown redirect mode: %u\n", r->mode); 183 break; 184 } 185 /* XXX perhaps return an error instead of panic ? */ 186 if (r->alink[0] == NULL) 187 panic("LibAliasRedirect* returned NULL"); 188 /* LSNAT handling. */ 189 for (i = 0; i < r->spool_cnt; i++) { 190 ser_s = (struct cfg_spool *)&buf[off]; 191 s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO); 192 memcpy(s, ser_s, SOF_SPOOL); 193 LibAliasAddServer(ptr->lib, r->alink[0], 194 s->addr, htons(s->port)); 195 off += SOF_SPOOL; 196 /* Hook spool entry. */ 197 LIST_INSERT_HEAD(&r->spool_chain, s, _next); 198 } 199 /* And finally hook this redir entry. */ 200 LIST_INSERT_HEAD(&ptr->redir_chain, r, _next); 201 } 202} 203 204static int 205ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) 206{ 207 struct mbuf *mcl; 208 struct ip *ip; 209 /* XXX - libalias duct tape */ 210 int ldt, retval; 211 char *c; 212 213 ldt = 0; 214 retval = 0; 215 mcl = m_megapullup(m, m->m_pkthdr.len); 216 if (mcl == NULL) { 217 args->m = NULL; 218 return (IP_FW_DENY); 219 } 220 ip = mtod(mcl, struct ip *); 221 222 /* 223 * XXX - Libalias checksum offload 'duct tape': 224 * 225 * locally generated packets have only pseudo-header checksum 226 * calculated and libalias will break it[1], so mark them for 227 * later fix. Moreover there are cases when libalias modifies 228 * tcp packet data[2], mark them for later fix too. 229 * 230 * [1] libalias was never meant to run in kernel, so it does 231 * not have any knowledge about checksum offloading, and 232 * expects a packet with a full internet checksum. 233 * Unfortunately, packets generated locally will have just the 234 * pseudo header calculated, and when libalias tries to adjust 235 * the checksum it will actually compute a wrong value. 236 * 237 * [2] when libalias modifies tcp's data content, full TCP 238 * checksum has to be recomputed: the problem is that 239 * libalias does not have any idea about checksum offloading. 240 * To work around this, we do not do checksumming in LibAlias, 241 * but only mark the packets in th_x2 field. If we receive a 242 * marked packet, we calculate correct checksum for it 243 * aware of offloading. Why such a terrible hack instead of 244 * recalculating checksum for each packet? 245 * Because the previous checksum was not checked! 246 * Recalculating checksums for EVERY packet will hide ALL 247 * transmission errors. Yes, marked packets still suffer from 248 * this problem. But, sigh, natd(8) has this problem, too. 249 * 250 * TODO: -make libalias mbuf aware (so 251 * it can handle delayed checksum and tso) 252 */ 253 254 if (mcl->m_pkthdr.rcvif == NULL && 255 mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) 256 ldt = 1; 257 258 c = mtod(mcl, char *); 259 if (args->oif == NULL) 260 retval = LibAliasIn(t->lib, c, 261 mcl->m_len + M_TRAILINGSPACE(mcl)); 262 else 263 retval = LibAliasOut(t->lib, c, 264 mcl->m_len + M_TRAILINGSPACE(mcl)); 265 if (retval == PKT_ALIAS_RESPOND) { 266 m->m_flags |= M_SKIP_FIREWALL; 267 retval = PKT_ALIAS_OK; 268 } 269 if (retval != PKT_ALIAS_OK && 270 retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) { 271 /* XXX - should i add some logging? */ 272 m_free(mcl); 273 args->m = NULL; 274 return (IP_FW_DENY); 275 } 276 mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len); 277 278 /* 279 * XXX - libalias checksum offload 280 * 'duct tape' (see above) 281 */ 282 283 if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && 284 ip->ip_p == IPPROTO_TCP) { 285 struct tcphdr *th; 286 287 th = (struct tcphdr *)(ip + 1); 288 if (th->th_x2) 289 ldt = 1; 290 } 291 292 if (ldt) { 293 struct tcphdr *th; 294 struct udphdr *uh; 295 u_short cksum; 296 297 ip->ip_len = ntohs(ip->ip_len); 298 cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 299 htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2))); 300 301 switch (ip->ip_p) { 302 case IPPROTO_TCP: 303 th = (struct tcphdr *)(ip + 1); 304 /* 305 * Maybe it was set in 306 * libalias... 307 */ 308 th->th_x2 = 0; 309 th->th_sum = cksum; 310 mcl->m_pkthdr.csum_data = 311 offsetof(struct tcphdr, th_sum); 312 break; 313 case IPPROTO_UDP: 314 uh = (struct udphdr *)(ip + 1); 315 uh->uh_sum = cksum; 316 mcl->m_pkthdr.csum_data = 317 offsetof(struct udphdr, uh_sum); 318 break; 319 } 320 /* No hw checksum offloading: do it ourselves */ 321 if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) { 322 in_delayed_cksum(mcl); 323 mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 324 } 325 ip->ip_len = htons(ip->ip_len); 326 } 327 args->m = mcl; 328 return (IP_FW_NAT); 329} 330 331static struct cfg_nat * 332lookup_nat(struct nat_list *l, int nat_id) 333{ 334 struct cfg_nat *res; 335 336 LIST_FOREACH(res, l, _next) { 337 if (res->id == nat_id) 338 break; 339 } 340 return res; 341} 342 343static int 344ipfw_nat_cfg(struct sockopt *sopt) 345{ 346 struct cfg_nat *cfg, *ptr; 347 char *buf; 348 struct ip_fw_chain *chain = &V_layer3_chain; 349 size_t len; 350 int gencnt, error = 0; 351 352 len = sopt->sopt_valsize; 353 buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 354 if ((error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat))) != 0) 355 goto out; 356 357 cfg = (struct cfg_nat *)buf; 358 if (cfg->id < 0) { 359 error = EINVAL; 360 goto out; 361 } 362 363 /* 364 * Find/create nat rule. 365 */ 366 IPFW_WLOCK(chain); 367 gencnt = chain->gencnt; 368 ptr = lookup_nat(&chain->nat, cfg->id); 369 if (ptr == NULL) { 370 IPFW_WUNLOCK(chain); 371 /* New rule: allocate and init new instance. */ 372 ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO); 373 ptr->lib = LibAliasInit(NULL); 374 LIST_INIT(&ptr->redir_chain); 375 } else { 376 /* Entry already present: temporarily unhook it. */ 377 LIST_REMOVE(ptr, _next); 378 flush_nat_ptrs(chain, cfg->id); 379 IPFW_WUNLOCK(chain); 380 } 381 382 /* 383 * Basic nat configuration. 384 */ 385 ptr->id = cfg->id; 386 /* 387 * XXX - what if this rule doesn't nat any ip and just 388 * redirect? 389 * do we set aliasaddress to 0.0.0.0? 390 */ 391 ptr->ip = cfg->ip; 392 ptr->redir_cnt = cfg->redir_cnt; 393 ptr->mode = cfg->mode; 394 LibAliasSetMode(ptr->lib, cfg->mode, cfg->mode); 395 LibAliasSetAddress(ptr->lib, ptr->ip); 396 memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE); 397 398 /* 399 * Redir and LSNAT configuration. 400 */ 401 /* Delete old cfgs. */ 402 del_redir_spool_cfg(ptr, &ptr->redir_chain); 403 /* Add new entries. */ 404 add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr); 405 406 IPFW_WLOCK(chain); 407 /* Extra check to avoid race with another ipfw_nat_cfg() */ 408 if (gencnt != chain->gencnt && 409 ((cfg = lookup_nat(&chain->nat, ptr->id)) != NULL)) 410 LIST_REMOVE(cfg, _next); 411 LIST_INSERT_HEAD(&chain->nat, ptr, _next); 412 chain->gencnt++; 413 IPFW_WUNLOCK(chain); 414 415out: 416 free(buf, M_TEMP); 417 return (error); 418} 419 420static int 421ipfw_nat_del(struct sockopt *sopt) 422{ 423 struct cfg_nat *ptr; 424 struct ip_fw_chain *chain = &V_layer3_chain; 425 int i; 426 427 sooptcopyin(sopt, &i, sizeof i, sizeof i); 428 /* XXX validate i */ 429 IPFW_WLOCK(chain); 430 ptr = lookup_nat(&chain->nat, i); 431 if (ptr == NULL) { 432 IPFW_WUNLOCK(chain); 433 return (EINVAL); 434 } 435 LIST_REMOVE(ptr, _next); 436 flush_nat_ptrs(chain, i); 437 IPFW_WUNLOCK(chain); 438 del_redir_spool_cfg(ptr, &ptr->redir_chain); 439 LibAliasUninit(ptr->lib); 440 free(ptr, M_IPFW); 441 return (0); 442} 443 444static int 445ipfw_nat_get_cfg(struct sockopt *sopt) 446{ 447 struct ip_fw_chain *chain = &V_layer3_chain; 448 struct cfg_nat *n; 449 struct cfg_redir *r; 450 struct cfg_spool *s; 451 char *data; 452 int gencnt, nat_cnt, len, error; 453 454 nat_cnt = 0; 455 len = sizeof(nat_cnt); 456 457 IPFW_RLOCK(chain); 458retry: 459 gencnt = chain->gencnt; 460 /* Estimate memory amount */ 461 LIST_FOREACH(n, &chain->nat, _next) { 462 nat_cnt++; 463 len += sizeof(struct cfg_nat); 464 LIST_FOREACH(r, &n->redir_chain, _next) { 465 len += sizeof(struct cfg_redir); 466 LIST_FOREACH(s, &r->spool_chain, _next) 467 len += sizeof(struct cfg_spool); 468 } 469 } 470 IPFW_RUNLOCK(chain); 471 472 data = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 473 bcopy(&nat_cnt, data, sizeof(nat_cnt)); 474 475 nat_cnt = 0; 476 len = sizeof(nat_cnt); 477 478 IPFW_RLOCK(chain); 479 if (gencnt != chain->gencnt) { 480 free(data, M_TEMP); 481 goto retry; 482 } 483 /* Serialize all the data. */ 484 LIST_FOREACH(n, &chain->nat, _next) { 485 bcopy(n, &data[len], sizeof(struct cfg_nat)); 486 len += sizeof(struct cfg_nat); 487 LIST_FOREACH(r, &n->redir_chain, _next) { 488 bcopy(r, &data[len], sizeof(struct cfg_redir)); 489 len += sizeof(struct cfg_redir); 490 LIST_FOREACH(s, &r->spool_chain, _next) { 491 bcopy(s, &data[len], sizeof(struct cfg_spool)); 492 len += sizeof(struct cfg_spool); 493 } 494 } 495 } 496 IPFW_RUNLOCK(chain); 497 498 error = sooptcopyout(sopt, data, len); 499 free(data, M_TEMP); 500 501 return (error); 502} 503 504static int 505ipfw_nat_get_log(struct sockopt *sopt) 506{ 507 uint8_t *data; 508 struct cfg_nat *ptr; 509 int i, size; 510 struct ip_fw_chain *chain; 511 512 chain = &V_layer3_chain; 513 514 IPFW_RLOCK(chain); 515 /* one pass to count, one to copy the data */ 516 i = 0; 517 LIST_FOREACH(ptr, &chain->nat, _next) { 518 if (ptr->lib->logDesc == NULL) 519 continue; 520 i++; 521 } 522 size = i * (LIBALIAS_BUF_SIZE + sizeof(int)); 523 data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO); 524 if (data == NULL) { 525 IPFW_RUNLOCK(chain); 526 return (ENOSPC); 527 } 528 i = 0; 529 LIST_FOREACH(ptr, &chain->nat, _next) { 530 if (ptr->lib->logDesc == NULL) 531 continue; 532 bcopy(&ptr->id, &data[i], sizeof(int)); 533 i += sizeof(int); 534 bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE); 535 i += LIBALIAS_BUF_SIZE; 536 } 537 IPFW_RUNLOCK(chain); 538 sooptcopyout(sopt, data, size); 539 free(data, M_IPFW); 540 return(0); 541} 542 543static void 544ipfw_nat_init(void) 545{ 546 547 IPFW_WLOCK(&V_layer3_chain); 548 /* init ipfw hooks */ 549 ipfw_nat_ptr = ipfw_nat; 550 lookup_nat_ptr = lookup_nat; 551 ipfw_nat_cfg_ptr = ipfw_nat_cfg; 552 ipfw_nat_del_ptr = ipfw_nat_del; 553 ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg; 554 ipfw_nat_get_log_ptr = ipfw_nat_get_log; 555 IPFW_WUNLOCK(&V_layer3_chain); 556 V_ifaddr_event_tag = EVENTHANDLER_REGISTER( 557 ifaddr_event, ifaddr_change, 558 NULL, EVENTHANDLER_PRI_ANY); 559} 560 561static void 562ipfw_nat_destroy(void) 563{ 564 struct cfg_nat *ptr, *ptr_temp; 565 struct ip_fw_chain *chain; 566 567 chain = &V_layer3_chain; 568 IPFW_WLOCK(chain); 569 LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) { 570 LIST_REMOVE(ptr, _next); 571 del_redir_spool_cfg(ptr, &ptr->redir_chain); 572 LibAliasUninit(ptr->lib); 573 free(ptr, M_IPFW); 574 } 575 EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag); 576 flush_nat_ptrs(chain, -1 /* flush all */); 577 /* deregister ipfw_nat */ 578 ipfw_nat_ptr = NULL; 579 lookup_nat_ptr = NULL; 580 ipfw_nat_cfg_ptr = NULL; 581 ipfw_nat_del_ptr = NULL; 582 ipfw_nat_get_cfg_ptr = NULL; 583 ipfw_nat_get_log_ptr = NULL; 584 IPFW_WUNLOCK(chain); 585} 586 587static int 588ipfw_nat_modevent(module_t mod, int type, void *unused) 589{ 590 int err = 0; 591 592 switch (type) { 593 case MOD_LOAD: 594 ipfw_nat_init(); 595 break; 596 597 case MOD_UNLOAD: 598 ipfw_nat_destroy(); 599 break; 600 601 default: 602 return EOPNOTSUPP; 603 break; 604 } 605 return err; 606} 607 608static moduledata_t ipfw_nat_mod = { 609 "ipfw_nat", 610 ipfw_nat_modevent, 611 0 612}; 613 614DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 615MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1); 616MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2); 617MODULE_VERSION(ipfw_nat, 1); 618/* end of file */ 619