ip_fw_nat.c revision 201527
1/*- 2 * Copyright (c) 2008 Paolo Pisati 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_nat.c 201527 2010-01-04 19:01:22Z luigi $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/eventhandler.h> 33#include <sys/malloc.h> 34#include <sys/kernel.h> 35#include <sys/lock.h> 36#include <sys/module.h> 37#include <sys/rwlock.h> 38 39#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */ 40 41#include <netinet/libalias/alias.h> 42#include <netinet/libalias/alias_local.h> 43 44#include <net/if.h> 45#include <netinet/in.h> 46#include <netinet/ip.h> 47#include <netinet/ip_var.h> 48#include <netinet/ip_fw.h> 49#include <netinet/ipfw/ip_fw_private.h> 50#include <netinet/tcp.h> 51#include <netinet/udp.h> 52 53#include <machine/in_cksum.h> /* XXX for in_cksum */ 54 55static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag); 56#define V_ifaddr_event_tag VNET(ifaddr_event_tag) 57 58static void 59ifaddr_change(void *arg __unused, struct ifnet *ifp) 60{ 61 struct cfg_nat *ptr; 62 struct ifaddr *ifa; 63 struct ip_fw_chain *chain; 64 65 chain = &V_layer3_chain; 66 IPFW_WLOCK(chain); 67 /* Check every nat entry... */ 68 LIST_FOREACH(ptr, &chain->nat, _next) { 69 /* ...using nic 'ifp->if_xname' as dynamic alias address. */ 70 if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0) 71 continue; 72 if_addr_rlock(ifp); 73 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 74 if (ifa->ifa_addr == NULL) 75 continue; 76 if (ifa->ifa_addr->sa_family != AF_INET) 77 continue; 78 ptr->ip = ((struct sockaddr_in *) 79 (ifa->ifa_addr))->sin_addr; 80 LibAliasSetAddress(ptr->lib, ptr->ip); 81 } 82 if_addr_runlock(ifp); 83 } 84 IPFW_WUNLOCK(chain); 85} 86 87/* 88 * delete the pointers for nat entry ix, or all of them if ix < 0 89 */ 90static void 91flush_nat_ptrs(struct ip_fw_chain *chain, const int ix) 92{ 93 int i; 94 ipfw_insn_nat *cmd; 95 96 IPFW_WLOCK_ASSERT(chain); 97 for (i = 0; i < chain->n_rules; i++) { 98 cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]); 99 /* XXX skip log and the like ? */ 100 if (cmd->o.opcode == O_NAT && cmd->nat != NULL && 101 (ix < 0 || cmd->nat->id == ix)) 102 cmd->nat = NULL; 103 } 104} 105 106static void 107del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head) 108{ 109 struct cfg_redir *r, *tmp_r; 110 struct cfg_spool *s, *tmp_s; 111 int i, num; 112 113 LIST_FOREACH_SAFE(r, head, _next, tmp_r) { 114 num = 1; /* Number of alias_link to delete. */ 115 switch (r->mode) { 116 case REDIR_PORT: 117 num = r->pport_cnt; 118 /* FALLTHROUGH */ 119 case REDIR_ADDR: 120 case REDIR_PROTO: 121 /* Delete all libalias redirect entry. */ 122 for (i = 0; i < num; i++) 123 LibAliasRedirectDelete(n->lib, r->alink[i]); 124 /* Del spool cfg if any. */ 125 LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) { 126 LIST_REMOVE(s, _next); 127 free(s, M_IPFW); 128 } 129 free(r->alink, M_IPFW); 130 LIST_REMOVE(r, _next); 131 free(r, M_IPFW); 132 break; 133 default: 134 printf("unknown redirect mode: %u\n", r->mode); 135 /* XXX - panic?!?!? */ 136 break; 137 } 138 } 139} 140 141static int 142add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) 143{ 144 struct cfg_redir *r, *ser_r; 145 struct cfg_spool *s, *ser_s; 146 int cnt, off, i; 147 148 for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) { 149 ser_r = (struct cfg_redir *)&buf[off]; 150 r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO); 151 memcpy(r, ser_r, SOF_REDIR); 152 LIST_INIT(&r->spool_chain); 153 off += SOF_REDIR; 154 r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt, 155 M_IPFW, M_WAITOK | M_ZERO); 156 switch (r->mode) { 157 case REDIR_ADDR: 158 r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr, 159 r->paddr); 160 break; 161 case REDIR_PORT: 162 for (i = 0 ; i < r->pport_cnt; i++) { 163 /* If remotePort is all ports, set it to 0. */ 164 u_short remotePortCopy = r->rport + i; 165 if (r->rport_cnt == 1 && r->rport == 0) 166 remotePortCopy = 0; 167 r->alink[i] = LibAliasRedirectPort(ptr->lib, 168 r->laddr, htons(r->lport + i), r->raddr, 169 htons(remotePortCopy), r->paddr, 170 htons(r->pport + i), r->proto); 171 if (r->alink[i] == NULL) { 172 r->alink[0] = NULL; 173 break; 174 } 175 } 176 break; 177 case REDIR_PROTO: 178 r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr, 179 r->raddr, r->paddr, r->proto); 180 break; 181 default: 182 printf("unknown redirect mode: %u\n", r->mode); 183 break; 184 } 185 /* XXX perhaps return an error instead of panic ? */ 186 if (r->alink[0] == NULL) 187 panic("LibAliasRedirect* returned NULL"); 188 /* LSNAT handling. */ 189 for (i = 0; i < r->spool_cnt; i++) { 190 ser_s = (struct cfg_spool *)&buf[off]; 191 s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO); 192 memcpy(s, ser_s, SOF_SPOOL); 193 LibAliasAddServer(ptr->lib, r->alink[0], 194 s->addr, htons(s->port)); 195 off += SOF_SPOOL; 196 /* Hook spool entry. */ 197 LIST_INSERT_HEAD(&r->spool_chain, s, _next); 198 } 199 /* And finally hook this redir entry. */ 200 LIST_INSERT_HEAD(&ptr->redir_chain, r, _next); 201 } 202 return (1); 203} 204 205static int 206ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) 207{ 208 struct mbuf *mcl; 209 struct ip *ip; 210 /* XXX - libalias duct tape */ 211 int ldt, retval; 212 char *c; 213 214 ldt = 0; 215 retval = 0; 216 mcl = m_megapullup(m, m->m_pkthdr.len); 217 if (mcl == NULL) { 218 args->m = NULL; 219 return (IP_FW_DENY); 220 } 221 ip = mtod(mcl, struct ip *); 222 223 /* 224 * XXX - Libalias checksum offload 'duct tape': 225 * 226 * locally generated packets have only pseudo-header checksum 227 * calculated and libalias will break it[1], so mark them for 228 * later fix. Moreover there are cases when libalias modifies 229 * tcp packet data[2], mark them for later fix too. 230 * 231 * [1] libalias was never meant to run in kernel, so it does 232 * not have any knowledge about checksum offloading, and 233 * expects a packet with a full internet checksum. 234 * Unfortunately, packets generated locally will have just the 235 * pseudo header calculated, and when libalias tries to adjust 236 * the checksum it will actually compute a wrong value. 237 * 238 * [2] when libalias modifies tcp's data content, full TCP 239 * checksum has to be recomputed: the problem is that 240 * libalias does not have any idea about checksum offloading. 241 * To work around this, we do not do checksumming in LibAlias, 242 * but only mark the packets in th_x2 field. If we receive a 243 * marked packet, we calculate correct checksum for it 244 * aware of offloading. Why such a terrible hack instead of 245 * recalculating checksum for each packet? 246 * Because the previous checksum was not checked! 247 * Recalculating checksums for EVERY packet will hide ALL 248 * transmission errors. Yes, marked packets still suffer from 249 * this problem. But, sigh, natd(8) has this problem, too. 250 * 251 * TODO: -make libalias mbuf aware (so 252 * it can handle delayed checksum and tso) 253 */ 254 255 if (mcl->m_pkthdr.rcvif == NULL && 256 mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) 257 ldt = 1; 258 259 c = mtod(mcl, char *); 260 if (args->oif == NULL) 261 retval = LibAliasIn(t->lib, c, 262 mcl->m_len + M_TRAILINGSPACE(mcl)); 263 else 264 retval = LibAliasOut(t->lib, c, 265 mcl->m_len + M_TRAILINGSPACE(mcl)); 266 if (retval == PKT_ALIAS_RESPOND) { 267 m->m_flags |= M_SKIP_FIREWALL; 268 retval = PKT_ALIAS_OK; 269 } 270 if (retval != PKT_ALIAS_OK && 271 retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) { 272 /* XXX - should i add some logging? */ 273 m_free(mcl); 274 args->m = NULL; 275 return (IP_FW_DENY); 276 } 277 mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len); 278 279 /* 280 * XXX - libalias checksum offload 281 * 'duct tape' (see above) 282 */ 283 284 if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && 285 ip->ip_p == IPPROTO_TCP) { 286 struct tcphdr *th; 287 288 th = (struct tcphdr *)(ip + 1); 289 if (th->th_x2) 290 ldt = 1; 291 } 292 293 if (ldt) { 294 struct tcphdr *th; 295 struct udphdr *uh; 296 u_short cksum; 297 298 /* XXX check if ip_len can stay in net format */ 299 cksum = in_pseudo( 300 ip->ip_src.s_addr, 301 ip->ip_dst.s_addr, 302 htons(ip->ip_p + ntohs(ip->ip_len) - (ip->ip_hl << 2)) 303 ); 304 305 switch (ip->ip_p) { 306 case IPPROTO_TCP: 307 th = (struct tcphdr *)(ip + 1); 308 /* 309 * Maybe it was set in 310 * libalias... 311 */ 312 th->th_x2 = 0; 313 th->th_sum = cksum; 314 mcl->m_pkthdr.csum_data = 315 offsetof(struct tcphdr, th_sum); 316 break; 317 case IPPROTO_UDP: 318 uh = (struct udphdr *)(ip + 1); 319 uh->uh_sum = cksum; 320 mcl->m_pkthdr.csum_data = 321 offsetof(struct udphdr, uh_sum); 322 break; 323 } 324 /* No hw checksum offloading: do it ourselves */ 325 if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) { 326 in_delayed_cksum(mcl); 327 mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 328 } 329 } 330 args->m = mcl; 331 return (IP_FW_NAT); 332} 333 334static struct cfg_nat * 335lookup_nat(struct nat_list *l, int nat_id) 336{ 337 struct cfg_nat *res; 338 339 LIST_FOREACH(res, l, _next) { 340 if (res->id == nat_id) 341 break; 342 } 343 return res; 344} 345 346static int 347ipfw_nat_cfg(struct sockopt *sopt) 348{ 349 struct cfg_nat *ptr, *ser_n; 350 char *buf; 351 struct ip_fw_chain *chain = &V_layer3_chain; 352 353 buf = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO); 354 sooptcopyin(sopt, buf, NAT_BUF_LEN, sizeof(struct cfg_nat)); 355 ser_n = (struct cfg_nat *)buf; 356 357 /* check valid parameter ser_n->id > 0 ? */ 358 /* 359 * Find/create nat rule. 360 */ 361 IPFW_WLOCK(chain); 362 ptr = lookup_nat(&chain->nat, ser_n->id); 363 if (ptr == NULL) { 364 /* New rule: allocate and init new instance. */ 365 ptr = malloc(sizeof(struct cfg_nat), 366 M_IPFW, M_NOWAIT | M_ZERO); 367 if (ptr == NULL) { 368 IPFW_WUNLOCK(chain); 369 free(buf, M_IPFW); 370 return (ENOSPC); 371 } 372 ptr->lib = LibAliasInit(NULL); 373 if (ptr->lib == NULL) { 374 IPFW_WUNLOCK(chain); 375 free(ptr, M_IPFW); 376 free(buf, M_IPFW); 377 return (EINVAL); 378 } 379 LIST_INIT(&ptr->redir_chain); 380 } else { 381 /* Entry already present: temporarly unhook it. */ 382 LIST_REMOVE(ptr, _next); 383 flush_nat_ptrs(chain, ser_n->id); 384 } 385 IPFW_WUNLOCK(chain); 386 387 /* 388 * Basic nat configuration. 389 */ 390 ptr->id = ser_n->id; 391 /* 392 * XXX - what if this rule doesn't nat any ip and just 393 * redirect? 394 * do we set aliasaddress to 0.0.0.0? 395 */ 396 ptr->ip = ser_n->ip; 397 ptr->redir_cnt = ser_n->redir_cnt; 398 ptr->mode = ser_n->mode; 399 LibAliasSetMode(ptr->lib, ser_n->mode, ser_n->mode); 400 LibAliasSetAddress(ptr->lib, ptr->ip); 401 memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE); 402 403 /* 404 * Redir and LSNAT configuration. 405 */ 406 /* Delete old cfgs. */ 407 del_redir_spool_cfg(ptr, &ptr->redir_chain); 408 /* Add new entries. */ 409 add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr); 410 free(buf, M_IPFW); 411 IPFW_WLOCK(chain); 412 LIST_INSERT_HEAD(&chain->nat, ptr, _next); 413 IPFW_WUNLOCK(chain); 414 return (0); 415} 416 417static int 418ipfw_nat_del(struct sockopt *sopt) 419{ 420 struct cfg_nat *ptr; 421 struct ip_fw_chain *chain = &V_layer3_chain; 422 int i; 423 424 sooptcopyin(sopt, &i, sizeof i, sizeof i); 425 /* XXX validate i */ 426 IPFW_WLOCK(chain); 427 ptr = lookup_nat(&chain->nat, i); 428 if (ptr == NULL) { 429 IPFW_WUNLOCK(chain); 430 return (EINVAL); 431 } 432 LIST_REMOVE(ptr, _next); 433 flush_nat_ptrs(chain, i); 434 IPFW_WUNLOCK(chain); 435 del_redir_spool_cfg(ptr, &ptr->redir_chain); 436 LibAliasUninit(ptr->lib); 437 free(ptr, M_IPFW); 438 return (0); 439} 440 441static int 442ipfw_nat_get_cfg(struct sockopt *sopt) 443{ 444 uint8_t *data; 445 struct cfg_nat *n; 446 struct cfg_redir *r; 447 struct cfg_spool *s; 448 int nat_cnt, off; 449 struct ip_fw_chain *chain; 450 int err = ENOSPC; 451 452 chain = &V_layer3_chain; 453 nat_cnt = 0; 454 off = sizeof(nat_cnt); 455 456 data = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO); 457 IPFW_RLOCK(chain); 458 /* Serialize all the data. */ 459 LIST_FOREACH(n, &chain->nat, _next) { 460 nat_cnt++; 461 if (off + SOF_NAT >= NAT_BUF_LEN) 462 goto nospace; 463 bcopy(n, &data[off], SOF_NAT); 464 off += SOF_NAT; 465 LIST_FOREACH(r, &n->redir_chain, _next) { 466 if (off + SOF_REDIR >= NAT_BUF_LEN) 467 goto nospace; 468 bcopy(r, &data[off], SOF_REDIR); 469 off += SOF_REDIR; 470 LIST_FOREACH(s, &r->spool_chain, _next) { 471 if (off + SOF_SPOOL >= NAT_BUF_LEN) 472 goto nospace; 473 bcopy(s, &data[off], SOF_SPOOL); 474 off += SOF_SPOOL; 475 } 476 } 477 } 478 err = 0; /* all good */ 479nospace: 480 IPFW_RUNLOCK(chain); 481 if (err == 0) { 482 bcopy(&nat_cnt, data, sizeof(nat_cnt)); 483 sooptcopyout(sopt, data, NAT_BUF_LEN); 484 } else { 485 printf("serialized data buffer not big enough:" 486 "please increase NAT_BUF_LEN\n"); 487 } 488 free(data, M_IPFW); 489 return (err); 490} 491 492static int 493ipfw_nat_get_log(struct sockopt *sopt) 494{ 495 uint8_t *data; 496 struct cfg_nat *ptr; 497 int i, size; 498 struct ip_fw_chain *chain; 499 500 chain = &V_layer3_chain; 501 502 IPFW_RLOCK(chain); 503 /* one pass to count, one to copy the data */ 504 i = 0; 505 LIST_FOREACH(ptr, &chain->nat, _next) { 506 if (ptr->lib->logDesc == NULL) 507 continue; 508 i++; 509 } 510 size = i * (LIBALIAS_BUF_SIZE + sizeof(int)); 511 data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO); 512 if (data == NULL) { 513 IPFW_RUNLOCK(chain); 514 return (ENOSPC); 515 } 516 i = 0; 517 LIST_FOREACH(ptr, &chain->nat, _next) { 518 if (ptr->lib->logDesc == NULL) 519 continue; 520 bcopy(&ptr->id, &data[i], sizeof(int)); 521 i += sizeof(int); 522 bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE); 523 i += LIBALIAS_BUF_SIZE; 524 } 525 IPFW_RUNLOCK(chain); 526 sooptcopyout(sopt, data, size); 527 free(data, M_IPFW); 528 return(0); 529} 530 531static void 532ipfw_nat_init(void) 533{ 534 535 IPFW_WLOCK(&V_layer3_chain); 536 /* init ipfw hooks */ 537 ipfw_nat_ptr = ipfw_nat; 538 lookup_nat_ptr = lookup_nat; 539 ipfw_nat_cfg_ptr = ipfw_nat_cfg; 540 ipfw_nat_del_ptr = ipfw_nat_del; 541 ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg; 542 ipfw_nat_get_log_ptr = ipfw_nat_get_log; 543 IPFW_WUNLOCK(&V_layer3_chain); 544 V_ifaddr_event_tag = EVENTHANDLER_REGISTER( 545 ifaddr_event, ifaddr_change, 546 NULL, EVENTHANDLER_PRI_ANY); 547} 548 549static void 550ipfw_nat_destroy(void) 551{ 552 struct cfg_nat *ptr, *ptr_temp; 553 struct ip_fw_chain *chain; 554 555 chain = &V_layer3_chain; 556 IPFW_WLOCK(chain); 557 LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) { 558 LIST_REMOVE(ptr, _next); 559 del_redir_spool_cfg(ptr, &ptr->redir_chain); 560 LibAliasUninit(ptr->lib); 561 free(ptr, M_IPFW); 562 } 563 EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag); 564 flush_nat_ptrs(chain, -1 /* flush all */); 565 /* deregister ipfw_nat */ 566 ipfw_nat_ptr = NULL; 567 lookup_nat_ptr = NULL; 568 ipfw_nat_cfg_ptr = NULL; 569 ipfw_nat_del_ptr = NULL; 570 ipfw_nat_get_cfg_ptr = NULL; 571 ipfw_nat_get_log_ptr = NULL; 572 IPFW_WUNLOCK(chain); 573} 574 575static int 576ipfw_nat_modevent(module_t mod, int type, void *unused) 577{ 578 int err = 0; 579 580 switch (type) { 581 case MOD_LOAD: 582 ipfw_nat_init(); 583 break; 584 585 case MOD_UNLOAD: 586 ipfw_nat_destroy(); 587 break; 588 589 default: 590 return EOPNOTSUPP; 591 break; 592 } 593 return err; 594} 595 596static moduledata_t ipfw_nat_mod = { 597 "ipfw_nat", 598 ipfw_nat_modevent, 599 0 600}; 601 602DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 603MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1); 604MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2); 605MODULE_VERSION(ipfw_nat, 1); 606/* end of file */ 607