ip_fw_nat.c revision 215701
1/*- 2 * Copyright (c) 2008 Paolo Pisati 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_nat.c 215701 2010-11-22 19:32:54Z dim $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/eventhandler.h> 33#include <sys/malloc.h> 34#include <sys/kernel.h> 35#include <sys/lock.h> 36#include <sys/module.h> 37#include <sys/rwlock.h> 38 39#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */ 40 41#include <netinet/libalias/alias.h> 42#include <netinet/libalias/alias_local.h> 43 44#include <net/if.h> 45#include <netinet/in.h> 46#include <netinet/ip.h> 47#include <netinet/ip_var.h> 48#include <netinet/ip_fw.h> 49#include <netinet/ipfw/ip_fw_private.h> 50#include <netinet/tcp.h> 51#include <netinet/udp.h> 52 53#include <machine/in_cksum.h> /* XXX for in_cksum */ 54 55static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag); 56#define V_ifaddr_event_tag VNET(ifaddr_event_tag) 57 58static void 59ifaddr_change(void *arg __unused, struct ifnet *ifp) 60{ 61 struct cfg_nat *ptr; 62 struct ifaddr *ifa; 63 struct ip_fw_chain *chain; 64 65 chain = &V_layer3_chain; 66 IPFW_WLOCK(chain); 67 /* Check every nat entry... */ 68 LIST_FOREACH(ptr, &chain->nat, _next) { 69 /* ...using nic 'ifp->if_xname' as dynamic alias address. */ 70 if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0) 71 continue; 72 if_addr_rlock(ifp); 73 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 74 if (ifa->ifa_addr == NULL) 75 continue; 76 if (ifa->ifa_addr->sa_family != AF_INET) 77 continue; 78 ptr->ip = ((struct sockaddr_in *) 79 (ifa->ifa_addr))->sin_addr; 80 LibAliasSetAddress(ptr->lib, ptr->ip); 81 } 82 if_addr_runlock(ifp); 83 } 84 IPFW_WUNLOCK(chain); 85} 86 87/* 88 * delete the pointers for nat entry ix, or all of them if ix < 0 89 */ 90static void 91flush_nat_ptrs(struct ip_fw_chain *chain, const int ix) 92{ 93 int i; 94 ipfw_insn_nat *cmd; 95 96 IPFW_WLOCK_ASSERT(chain); 97 for (i = 0; i < chain->n_rules; i++) { 98 cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]); 99 /* XXX skip log and the like ? */ 100 if (cmd->o.opcode == O_NAT && cmd->nat != NULL && 101 (ix < 0 || cmd->nat->id == ix)) 102 cmd->nat = NULL; 103 } 104} 105 106static void 107del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head) 108{ 109 struct cfg_redir *r, *tmp_r; 110 struct cfg_spool *s, *tmp_s; 111 int i, num; 112 113 LIST_FOREACH_SAFE(r, head, _next, tmp_r) { 114 num = 1; /* Number of alias_link to delete. */ 115 switch (r->mode) { 116 case REDIR_PORT: 117 num = r->pport_cnt; 118 /* FALLTHROUGH */ 119 case REDIR_ADDR: 120 case REDIR_PROTO: 121 /* Delete all libalias redirect entry. */ 122 for (i = 0; i < num; i++) 123 LibAliasRedirectDelete(n->lib, r->alink[i]); 124 /* Del spool cfg if any. */ 125 LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) { 126 LIST_REMOVE(s, _next); 127 free(s, M_IPFW); 128 } 129 free(r->alink, M_IPFW); 130 LIST_REMOVE(r, _next); 131 free(r, M_IPFW); 132 break; 133 default: 134 printf("unknown redirect mode: %u\n", r->mode); 135 /* XXX - panic?!?!? */ 136 break; 137 } 138 } 139} 140 141static int 142add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) 143{ 144 struct cfg_redir *r, *ser_r; 145 struct cfg_spool *s, *ser_s; 146 int cnt, off, i; 147 148 for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) { 149 ser_r = (struct cfg_redir *)&buf[off]; 150 r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO); 151 memcpy(r, ser_r, SOF_REDIR); 152 LIST_INIT(&r->spool_chain); 153 off += SOF_REDIR; 154 r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt, 155 M_IPFW, M_WAITOK | M_ZERO); 156 switch (r->mode) { 157 case REDIR_ADDR: 158 r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr, 159 r->paddr); 160 break; 161 case REDIR_PORT: 162 for (i = 0 ; i < r->pport_cnt; i++) { 163 /* If remotePort is all ports, set it to 0. */ 164 u_short remotePortCopy = r->rport + i; 165 if (r->rport_cnt == 1 && r->rport == 0) 166 remotePortCopy = 0; 167 r->alink[i] = LibAliasRedirectPort(ptr->lib, 168 r->laddr, htons(r->lport + i), r->raddr, 169 htons(remotePortCopy), r->paddr, 170 htons(r->pport + i), r->proto); 171 if (r->alink[i] == NULL) { 172 r->alink[0] = NULL; 173 break; 174 } 175 } 176 break; 177 case REDIR_PROTO: 178 r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr, 179 r->raddr, r->paddr, r->proto); 180 break; 181 default: 182 printf("unknown redirect mode: %u\n", r->mode); 183 break; 184 } 185 /* XXX perhaps return an error instead of panic ? */ 186 if (r->alink[0] == NULL) 187 panic("LibAliasRedirect* returned NULL"); 188 /* LSNAT handling. */ 189 for (i = 0; i < r->spool_cnt; i++) { 190 ser_s = (struct cfg_spool *)&buf[off]; 191 s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO); 192 memcpy(s, ser_s, SOF_SPOOL); 193 LibAliasAddServer(ptr->lib, r->alink[0], 194 s->addr, htons(s->port)); 195 off += SOF_SPOOL; 196 /* Hook spool entry. */ 197 LIST_INSERT_HEAD(&r->spool_chain, s, _next); 198 } 199 /* And finally hook this redir entry. */ 200 LIST_INSERT_HEAD(&ptr->redir_chain, r, _next); 201 } 202 return (1); 203} 204 205static int 206ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) 207{ 208 struct mbuf *mcl; 209 struct ip *ip; 210 /* XXX - libalias duct tape */ 211 int ldt, retval; 212 char *c; 213 214 ldt = 0; 215 retval = 0; 216 mcl = m_megapullup(m, m->m_pkthdr.len); 217 if (mcl == NULL) { 218 args->m = NULL; 219 return (IP_FW_DENY); 220 } 221 ip = mtod(mcl, struct ip *); 222 223 /* 224 * XXX - Libalias checksum offload 'duct tape': 225 * 226 * locally generated packets have only pseudo-header checksum 227 * calculated and libalias will break it[1], so mark them for 228 * later fix. Moreover there are cases when libalias modifies 229 * tcp packet data[2], mark them for later fix too. 230 * 231 * [1] libalias was never meant to run in kernel, so it does 232 * not have any knowledge about checksum offloading, and 233 * expects a packet with a full internet checksum. 234 * Unfortunately, packets generated locally will have just the 235 * pseudo header calculated, and when libalias tries to adjust 236 * the checksum it will actually compute a wrong value. 237 * 238 * [2] when libalias modifies tcp's data content, full TCP 239 * checksum has to be recomputed: the problem is that 240 * libalias does not have any idea about checksum offloading. 241 * To work around this, we do not do checksumming in LibAlias, 242 * but only mark the packets in th_x2 field. If we receive a 243 * marked packet, we calculate correct checksum for it 244 * aware of offloading. Why such a terrible hack instead of 245 * recalculating checksum for each packet? 246 * Because the previous checksum was not checked! 247 * Recalculating checksums for EVERY packet will hide ALL 248 * transmission errors. Yes, marked packets still suffer from 249 * this problem. But, sigh, natd(8) has this problem, too. 250 * 251 * TODO: -make libalias mbuf aware (so 252 * it can handle delayed checksum and tso) 253 */ 254 255 if (mcl->m_pkthdr.rcvif == NULL && 256 mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) 257 ldt = 1; 258 259 c = mtod(mcl, char *); 260 if (args->oif == NULL) 261 retval = LibAliasIn(t->lib, c, 262 mcl->m_len + M_TRAILINGSPACE(mcl)); 263 else 264 retval = LibAliasOut(t->lib, c, 265 mcl->m_len + M_TRAILINGSPACE(mcl)); 266 if (retval == PKT_ALIAS_RESPOND) { 267 m->m_flags |= M_SKIP_FIREWALL; 268 retval = PKT_ALIAS_OK; 269 } 270 if (retval != PKT_ALIAS_OK && 271 retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) { 272 /* XXX - should i add some logging? */ 273 m_free(mcl); 274 args->m = NULL; 275 return (IP_FW_DENY); 276 } 277 mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len); 278 279 /* 280 * XXX - libalias checksum offload 281 * 'duct tape' (see above) 282 */ 283 284 if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && 285 ip->ip_p == IPPROTO_TCP) { 286 struct tcphdr *th; 287 288 th = (struct tcphdr *)(ip + 1); 289 if (th->th_x2) 290 ldt = 1; 291 } 292 293 if (ldt) { 294 struct tcphdr *th; 295 struct udphdr *uh; 296 u_short cksum; 297 298 ip->ip_len = ntohs(ip->ip_len); 299 cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 300 htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2))); 301 302 switch (ip->ip_p) { 303 case IPPROTO_TCP: 304 th = (struct tcphdr *)(ip + 1); 305 /* 306 * Maybe it was set in 307 * libalias... 308 */ 309 th->th_x2 = 0; 310 th->th_sum = cksum; 311 mcl->m_pkthdr.csum_data = 312 offsetof(struct tcphdr, th_sum); 313 break; 314 case IPPROTO_UDP: 315 uh = (struct udphdr *)(ip + 1); 316 uh->uh_sum = cksum; 317 mcl->m_pkthdr.csum_data = 318 offsetof(struct udphdr, uh_sum); 319 break; 320 } 321 /* No hw checksum offloading: do it ourselves */ 322 if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) { 323 in_delayed_cksum(mcl); 324 mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 325 } 326 ip->ip_len = htons(ip->ip_len); 327 } 328 args->m = mcl; 329 return (IP_FW_NAT); 330} 331 332static struct cfg_nat * 333lookup_nat(struct nat_list *l, int nat_id) 334{ 335 struct cfg_nat *res; 336 337 LIST_FOREACH(res, l, _next) { 338 if (res->id == nat_id) 339 break; 340 } 341 return res; 342} 343 344static int 345ipfw_nat_cfg(struct sockopt *sopt) 346{ 347 struct cfg_nat *ptr, *ser_n; 348 char *buf; 349 struct ip_fw_chain *chain = &V_layer3_chain; 350 351 buf = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO); 352 sooptcopyin(sopt, buf, NAT_BUF_LEN, sizeof(struct cfg_nat)); 353 ser_n = (struct cfg_nat *)buf; 354 355 /* check valid parameter ser_n->id > 0 ? */ 356 /* 357 * Find/create nat rule. 358 */ 359 IPFW_WLOCK(chain); 360 ptr = lookup_nat(&chain->nat, ser_n->id); 361 if (ptr == NULL) { 362 /* New rule: allocate and init new instance. */ 363 ptr = malloc(sizeof(struct cfg_nat), 364 M_IPFW, M_NOWAIT | M_ZERO); 365 if (ptr == NULL) { 366 IPFW_WUNLOCK(chain); 367 free(buf, M_IPFW); 368 return (ENOSPC); 369 } 370 ptr->lib = LibAliasInit(NULL); 371 if (ptr->lib == NULL) { 372 IPFW_WUNLOCK(chain); 373 free(ptr, M_IPFW); 374 free(buf, M_IPFW); 375 return (EINVAL); 376 } 377 LIST_INIT(&ptr->redir_chain); 378 } else { 379 /* Entry already present: temporarly unhook it. */ 380 LIST_REMOVE(ptr, _next); 381 flush_nat_ptrs(chain, ser_n->id); 382 } 383 IPFW_WUNLOCK(chain); 384 385 /* 386 * Basic nat configuration. 387 */ 388 ptr->id = ser_n->id; 389 /* 390 * XXX - what if this rule doesn't nat any ip and just 391 * redirect? 392 * do we set aliasaddress to 0.0.0.0? 393 */ 394 ptr->ip = ser_n->ip; 395 ptr->redir_cnt = ser_n->redir_cnt; 396 ptr->mode = ser_n->mode; 397 LibAliasSetMode(ptr->lib, ser_n->mode, ser_n->mode); 398 LibAliasSetAddress(ptr->lib, ptr->ip); 399 memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE); 400 401 /* 402 * Redir and LSNAT configuration. 403 */ 404 /* Delete old cfgs. */ 405 del_redir_spool_cfg(ptr, &ptr->redir_chain); 406 /* Add new entries. */ 407 add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr); 408 free(buf, M_IPFW); 409 IPFW_WLOCK(chain); 410 LIST_INSERT_HEAD(&chain->nat, ptr, _next); 411 IPFW_WUNLOCK(chain); 412 return (0); 413} 414 415static int 416ipfw_nat_del(struct sockopt *sopt) 417{ 418 struct cfg_nat *ptr; 419 struct ip_fw_chain *chain = &V_layer3_chain; 420 int i; 421 422 sooptcopyin(sopt, &i, sizeof i, sizeof i); 423 /* XXX validate i */ 424 IPFW_WLOCK(chain); 425 ptr = lookup_nat(&chain->nat, i); 426 if (ptr == NULL) { 427 IPFW_WUNLOCK(chain); 428 return (EINVAL); 429 } 430 LIST_REMOVE(ptr, _next); 431 flush_nat_ptrs(chain, i); 432 IPFW_WUNLOCK(chain); 433 del_redir_spool_cfg(ptr, &ptr->redir_chain); 434 LibAliasUninit(ptr->lib); 435 free(ptr, M_IPFW); 436 return (0); 437} 438 439static int 440ipfw_nat_get_cfg(struct sockopt *sopt) 441{ 442 uint8_t *data; 443 struct cfg_nat *n; 444 struct cfg_redir *r; 445 struct cfg_spool *s; 446 int nat_cnt, off; 447 struct ip_fw_chain *chain; 448 int err = ENOSPC; 449 450 chain = &V_layer3_chain; 451 nat_cnt = 0; 452 off = sizeof(nat_cnt); 453 454 data = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO); 455 IPFW_RLOCK(chain); 456 /* Serialize all the data. */ 457 LIST_FOREACH(n, &chain->nat, _next) { 458 nat_cnt++; 459 if (off + SOF_NAT >= NAT_BUF_LEN) 460 goto nospace; 461 bcopy(n, &data[off], SOF_NAT); 462 off += SOF_NAT; 463 LIST_FOREACH(r, &n->redir_chain, _next) { 464 if (off + SOF_REDIR >= NAT_BUF_LEN) 465 goto nospace; 466 bcopy(r, &data[off], SOF_REDIR); 467 off += SOF_REDIR; 468 LIST_FOREACH(s, &r->spool_chain, _next) { 469 if (off + SOF_SPOOL >= NAT_BUF_LEN) 470 goto nospace; 471 bcopy(s, &data[off], SOF_SPOOL); 472 off += SOF_SPOOL; 473 } 474 } 475 } 476 err = 0; /* all good */ 477nospace: 478 IPFW_RUNLOCK(chain); 479 if (err == 0) { 480 bcopy(&nat_cnt, data, sizeof(nat_cnt)); 481 sooptcopyout(sopt, data, NAT_BUF_LEN); 482 } else { 483 printf("serialized data buffer not big enough:" 484 "please increase NAT_BUF_LEN\n"); 485 } 486 free(data, M_IPFW); 487 return (err); 488} 489 490static int 491ipfw_nat_get_log(struct sockopt *sopt) 492{ 493 uint8_t *data; 494 struct cfg_nat *ptr; 495 int i, size; 496 struct ip_fw_chain *chain; 497 498 chain = &V_layer3_chain; 499 500 IPFW_RLOCK(chain); 501 /* one pass to count, one to copy the data */ 502 i = 0; 503 LIST_FOREACH(ptr, &chain->nat, _next) { 504 if (ptr->lib->logDesc == NULL) 505 continue; 506 i++; 507 } 508 size = i * (LIBALIAS_BUF_SIZE + sizeof(int)); 509 data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO); 510 if (data == NULL) { 511 IPFW_RUNLOCK(chain); 512 return (ENOSPC); 513 } 514 i = 0; 515 LIST_FOREACH(ptr, &chain->nat, _next) { 516 if (ptr->lib->logDesc == NULL) 517 continue; 518 bcopy(&ptr->id, &data[i], sizeof(int)); 519 i += sizeof(int); 520 bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE); 521 i += LIBALIAS_BUF_SIZE; 522 } 523 IPFW_RUNLOCK(chain); 524 sooptcopyout(sopt, data, size); 525 free(data, M_IPFW); 526 return(0); 527} 528 529static void 530ipfw_nat_init(void) 531{ 532 533 IPFW_WLOCK(&V_layer3_chain); 534 /* init ipfw hooks */ 535 ipfw_nat_ptr = ipfw_nat; 536 lookup_nat_ptr = lookup_nat; 537 ipfw_nat_cfg_ptr = ipfw_nat_cfg; 538 ipfw_nat_del_ptr = ipfw_nat_del; 539 ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg; 540 ipfw_nat_get_log_ptr = ipfw_nat_get_log; 541 IPFW_WUNLOCK(&V_layer3_chain); 542 V_ifaddr_event_tag = EVENTHANDLER_REGISTER( 543 ifaddr_event, ifaddr_change, 544 NULL, EVENTHANDLER_PRI_ANY); 545} 546 547static void 548ipfw_nat_destroy(void) 549{ 550 struct cfg_nat *ptr, *ptr_temp; 551 struct ip_fw_chain *chain; 552 553 chain = &V_layer3_chain; 554 IPFW_WLOCK(chain); 555 LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) { 556 LIST_REMOVE(ptr, _next); 557 del_redir_spool_cfg(ptr, &ptr->redir_chain); 558 LibAliasUninit(ptr->lib); 559 free(ptr, M_IPFW); 560 } 561 EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag); 562 flush_nat_ptrs(chain, -1 /* flush all */); 563 /* deregister ipfw_nat */ 564 ipfw_nat_ptr = NULL; 565 lookup_nat_ptr = NULL; 566 ipfw_nat_cfg_ptr = NULL; 567 ipfw_nat_del_ptr = NULL; 568 ipfw_nat_get_cfg_ptr = NULL; 569 ipfw_nat_get_log_ptr = NULL; 570 IPFW_WUNLOCK(chain); 571} 572 573static int 574ipfw_nat_modevent(module_t mod, int type, void *unused) 575{ 576 int err = 0; 577 578 switch (type) { 579 case MOD_LOAD: 580 ipfw_nat_init(); 581 break; 582 583 case MOD_UNLOAD: 584 ipfw_nat_destroy(); 585 break; 586 587 default: 588 return EOPNOTSUPP; 589 break; 590 } 591 return err; 592} 593 594static moduledata_t ipfw_nat_mod = { 595 "ipfw_nat", 596 ipfw_nat_modevent, 597 0 598}; 599 600DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 601MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1); 602MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2); 603MODULE_VERSION(ipfw_nat, 1); 604/* end of file */ 605