1/* ip_nat_helper.c - generic support functions for NAT helpers 2 * 3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org> 4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10#include <linux/module.h> 11#include <linux/gfp.h> 12#include <linux/kmod.h> 13#include <linux/types.h> 14#include <linux/timer.h> 15#include <linux/skbuff.h> 16#include <linux/tcp.h> 17#include <linux/udp.h> 18#include <net/checksum.h> 19#include <net/tcp.h> 20#include <net/route.h> 21 22#include <linux/netfilter_ipv4.h> 23#include <net/netfilter/nf_conntrack.h> 24#include <net/netfilter/nf_conntrack_helper.h> 25#include <net/netfilter/nf_conntrack_ecache.h> 26#include <net/netfilter/nf_conntrack_expect.h> 27#include <net/netfilter/nf_nat.h> 28#include <net/netfilter/nf_nat_protocol.h> 29#include <net/netfilter/nf_nat_core.h> 30#include <net/netfilter/nf_nat_helper.h> 31 32#define DUMP_OFFSET(x) \ 33 pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ 34 x->offset_before, x->offset_after, x->correction_pos); 35 36static DEFINE_SPINLOCK(nf_nat_seqofs_lock); 37 38/* Setup TCP sequence correction given this change at this sequence */ 39static inline void 40adjust_tcp_sequence(u32 seq, 41 int sizediff, 42 struct nf_conn *ct, 43 enum ip_conntrack_info ctinfo) 44{ 45 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 46 struct nf_conn_nat *nat = nfct_nat(ct); 47 struct nf_nat_seq *this_way = &nat->seq[dir]; 48 49 pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", 50 seq, sizediff); 51 52 pr_debug("adjust_tcp_sequence: Seq_offset before: "); 53 DUMP_OFFSET(this_way); 54 55 spin_lock_bh(&nf_nat_seqofs_lock); 56 57 /* SYN adjust. If it's uninitialized, or this is after last 58 * correction, record it: we don't handle more than one 59 * adjustment in the window, but do deal with common case of a 60 * retransmit */ 61 if (this_way->offset_before == this_way->offset_after || 62 before(this_way->correction_pos, seq)) { 63 this_way->correction_pos = seq; 64 this_way->offset_before = this_way->offset_after; 65 this_way->offset_after += sizediff; 66 } 67 spin_unlock_bh(&nf_nat_seqofs_lock); 68 69 pr_debug("adjust_tcp_sequence: Seq_offset after: "); 70 DUMP_OFFSET(this_way); 71} 72 73/* Get the offset value, for conntrack */ 74s16 nf_nat_get_offset(const struct nf_conn *ct, 75 enum ip_conntrack_dir dir, 76 u32 seq) 77{ 78 struct nf_conn_nat *nat = nfct_nat(ct); 79 struct nf_nat_seq *this_way; 80 s16 offset; 81 82 if (!nat) 83 return 0; 84 85 this_way = &nat->seq[dir]; 86 spin_lock_bh(&nf_nat_seqofs_lock); 87 offset = after(seq, this_way->correction_pos) 88 ? this_way->offset_after : this_way->offset_before; 89 spin_unlock_bh(&nf_nat_seqofs_lock); 90 91 return offset; 92} 93EXPORT_SYMBOL_GPL(nf_nat_get_offset); 94 95/* Frobs data inside this packet, which is linear. */ 96static void mangle_contents(struct sk_buff *skb, 97 unsigned int dataoff, 98 unsigned int match_offset, 99 unsigned int match_len, 100 const char *rep_buffer, 101 unsigned int rep_len) 102{ 103 unsigned char *data; 104 105 BUG_ON(skb_is_nonlinear(skb)); 106 data = skb_network_header(skb) + dataoff; 107 108 /* move post-replacement */ 109 memmove(data + match_offset + rep_len, 110 data + match_offset + match_len, 111 skb->tail - (skb->network_header + dataoff + 112 match_offset + match_len)); 113 114 /* insert data from buffer */ 115 memcpy(data + match_offset, rep_buffer, rep_len); 116 117 /* update skb info */ 118 if (rep_len > match_len) { 119 pr_debug("nf_nat_mangle_packet: Extending packet by " 120 "%u from %u bytes\n", rep_len - match_len, skb->len); 121 skb_put(skb, rep_len - match_len); 122 } else { 123 pr_debug("nf_nat_mangle_packet: Shrinking packet from " 124 "%u from %u bytes\n", match_len - rep_len, skb->len); 125 __skb_trim(skb, skb->len + rep_len - match_len); 126 } 127 128 /* fix IP hdr checksum information */ 129 ip_hdr(skb)->tot_len = htons(skb->len); 130 ip_send_check(ip_hdr(skb)); 131} 132 133/* Unusual, but possible case. */ 134static int enlarge_skb(struct sk_buff *skb, unsigned int extra) 135{ 136 if (skb->len + extra > 65535) 137 return 0; 138 139 if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) 140 return 0; 141 142 return 1; 143} 144 145void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, 146 __be32 seq, s16 off) 147{ 148 if (!off) 149 return; 150 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); 151 adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); 152 nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); 153} 154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); 155 156int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, 157 struct nf_conn *ct, 158 enum ip_conntrack_info ctinfo, 159 unsigned int match_offset, 160 unsigned int match_len, 161 const char *rep_buffer, 162 unsigned int rep_len, bool adjust) 163{ 164 struct rtable *rt = skb_rtable(skb); 165 struct iphdr *iph; 166 struct tcphdr *tcph; 167 int oldlen, datalen; 168 169 if (!skb_make_writable(skb, skb->len)) 170 return 0; 171 172 if (rep_len > match_len && 173 rep_len - match_len > skb_tailroom(skb) && 174 !enlarge_skb(skb, rep_len - match_len)) 175 return 0; 176 177 SKB_LINEAR_ASSERT(skb); 178 179 iph = ip_hdr(skb); 180 tcph = (void *)iph + iph->ihl*4; 181 182 oldlen = skb->len - iph->ihl*4; 183 mangle_contents(skb, iph->ihl*4 + tcph->doff*4, 184 match_offset, match_len, rep_buffer, rep_len); 185 186 datalen = skb->len - iph->ihl*4; 187 if (skb->ip_summed != CHECKSUM_PARTIAL) { 188 if (!(rt->rt_flags & RTCF_LOCAL) && 189 skb->dev->features & NETIF_F_V4_CSUM) { 190 skb->ip_summed = CHECKSUM_PARTIAL; 191 skb->csum_start = skb_headroom(skb) + 192 skb_network_offset(skb) + 193 iph->ihl * 4; 194 skb->csum_offset = offsetof(struct tcphdr, check); 195 tcph->check = ~tcp_v4_check(datalen, 196 iph->saddr, iph->daddr, 0); 197 } else { 198 tcph->check = 0; 199 tcph->check = tcp_v4_check(datalen, 200 iph->saddr, iph->daddr, 201 csum_partial(tcph, 202 datalen, 0)); 203 } 204 } else 205 inet_proto_csum_replace2(&tcph->check, skb, 206 htons(oldlen), htons(datalen), 1); 207 208 if (adjust && rep_len != match_len) 209 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, 210 (int)rep_len - (int)match_len); 211 212 return 1; 213} 214EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); 215 216int 217nf_nat_mangle_udp_packet(struct sk_buff *skb, 218 struct nf_conn *ct, 219 enum ip_conntrack_info ctinfo, 220 unsigned int match_offset, 221 unsigned int match_len, 222 const char *rep_buffer, 223 unsigned int rep_len) 224{ 225 struct rtable *rt = skb_rtable(skb); 226 struct iphdr *iph; 227 struct udphdr *udph; 228 int datalen, oldlen; 229 230 /* UDP helpers might accidentally mangle the wrong packet */ 231 iph = ip_hdr(skb); 232 if (skb->len < iph->ihl*4 + sizeof(*udph) + 233 match_offset + match_len) 234 return 0; 235 236 if (!skb_make_writable(skb, skb->len)) 237 return 0; 238 239 if (rep_len > match_len && 240 rep_len - match_len > skb_tailroom(skb) && 241 !enlarge_skb(skb, rep_len - match_len)) 242 return 0; 243 244 iph = ip_hdr(skb); 245 udph = (void *)iph + iph->ihl*4; 246 247 oldlen = skb->len - iph->ihl*4; 248 mangle_contents(skb, iph->ihl*4 + sizeof(*udph), 249 match_offset, match_len, rep_buffer, rep_len); 250 251 /* update the length of the UDP packet */ 252 datalen = skb->len - iph->ihl*4; 253 udph->len = htons(datalen); 254 255 /* fix udp checksum if udp checksum was previously calculated */ 256 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) 257 return 1; 258 259 if (skb->ip_summed != CHECKSUM_PARTIAL) { 260 if (!(rt->rt_flags & RTCF_LOCAL) && 261 skb->dev->features & NETIF_F_V4_CSUM) { 262 skb->ip_summed = CHECKSUM_PARTIAL; 263 skb->csum_start = skb_headroom(skb) + 264 skb_network_offset(skb) + 265 iph->ihl * 4; 266 skb->csum_offset = offsetof(struct udphdr, check); 267 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 268 datalen, IPPROTO_UDP, 269 0); 270 } else { 271 udph->check = 0; 272 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 273 datalen, IPPROTO_UDP, 274 csum_partial(udph, 275 datalen, 0)); 276 if (!udph->check) 277 udph->check = CSUM_MANGLED_0; 278 } 279 } else 280 inet_proto_csum_replace2(&udph->check, skb, 281 htons(oldlen), htons(datalen), 1); 282 283 return 1; 284} 285EXPORT_SYMBOL(nf_nat_mangle_udp_packet); 286 287/* Adjust one found SACK option including checksum correction */ 288static void 289sack_adjust(struct sk_buff *skb, 290 struct tcphdr *tcph, 291 unsigned int sackoff, 292 unsigned int sackend, 293 struct nf_nat_seq *natseq) 294{ 295 while (sackoff < sackend) { 296 struct tcp_sack_block_wire *sack; 297 __be32 new_start_seq, new_end_seq; 298 299 sack = (void *)skb->data + sackoff; 300 if (after(ntohl(sack->start_seq) - natseq->offset_before, 301 natseq->correction_pos)) 302 new_start_seq = htonl(ntohl(sack->start_seq) 303 - natseq->offset_after); 304 else 305 new_start_seq = htonl(ntohl(sack->start_seq) 306 - natseq->offset_before); 307 308 if (after(ntohl(sack->end_seq) - natseq->offset_before, 309 natseq->correction_pos)) 310 new_end_seq = htonl(ntohl(sack->end_seq) 311 - natseq->offset_after); 312 else 313 new_end_seq = htonl(ntohl(sack->end_seq) 314 - natseq->offset_before); 315 316 pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", 317 ntohl(sack->start_seq), new_start_seq, 318 ntohl(sack->end_seq), new_end_seq); 319 320 inet_proto_csum_replace4(&tcph->check, skb, 321 sack->start_seq, new_start_seq, 0); 322 inet_proto_csum_replace4(&tcph->check, skb, 323 sack->end_seq, new_end_seq, 0); 324 sack->start_seq = new_start_seq; 325 sack->end_seq = new_end_seq; 326 sackoff += sizeof(*sack); 327 } 328} 329 330/* TCP SACK sequence number adjustment */ 331static inline unsigned int 332nf_nat_sack_adjust(struct sk_buff *skb, 333 struct tcphdr *tcph, 334 struct nf_conn *ct, 335 enum ip_conntrack_info ctinfo) 336{ 337 unsigned int dir, optoff, optend; 338 struct nf_conn_nat *nat = nfct_nat(ct); 339 340 optoff = ip_hdrlen(skb) + sizeof(struct tcphdr); 341 optend = ip_hdrlen(skb) + tcph->doff * 4; 342 343 if (!skb_make_writable(skb, optend)) 344 return 0; 345 346 dir = CTINFO2DIR(ctinfo); 347 348 while (optoff < optend) { 349 /* Usually: option, length. */ 350 unsigned char *op = skb->data + optoff; 351 352 switch (op[0]) { 353 case TCPOPT_EOL: 354 return 1; 355 case TCPOPT_NOP: 356 optoff++; 357 continue; 358 default: 359 /* no partial options */ 360 if (optoff + 1 == optend || 361 optoff + op[1] > optend || 362 op[1] < 2) 363 return 0; 364 if (op[0] == TCPOPT_SACK && 365 op[1] >= 2+TCPOLEN_SACK_PERBLOCK && 366 ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) 367 sack_adjust(skb, tcph, optoff+2, 368 optoff+op[1], &nat->seq[!dir]); 369 optoff += op[1]; 370 } 371 } 372 return 1; 373} 374 375/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ 376int 377nf_nat_seq_adjust(struct sk_buff *skb, 378 struct nf_conn *ct, 379 enum ip_conntrack_info ctinfo) 380{ 381 struct tcphdr *tcph; 382 int dir; 383 __be32 newseq, newack; 384 s16 seqoff, ackoff; 385 struct nf_conn_nat *nat = nfct_nat(ct); 386 struct nf_nat_seq *this_way, *other_way; 387 388 dir = CTINFO2DIR(ctinfo); 389 390 this_way = &nat->seq[dir]; 391 other_way = &nat->seq[!dir]; 392 393 if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) 394 return 0; 395 396 tcph = (void *)skb->data + ip_hdrlen(skb); 397 if (after(ntohl(tcph->seq), this_way->correction_pos)) 398 seqoff = this_way->offset_after; 399 else 400 seqoff = this_way->offset_before; 401 402 if (after(ntohl(tcph->ack_seq) - other_way->offset_before, 403 other_way->correction_pos)) 404 ackoff = other_way->offset_after; 405 else 406 ackoff = other_way->offset_before; 407 408 newseq = htonl(ntohl(tcph->seq) + seqoff); 409 newack = htonl(ntohl(tcph->ack_seq) - ackoff); 410 411 inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); 412 inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); 413 414 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", 415 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), 416 ntohl(newack)); 417 418 tcph->seq = newseq; 419 tcph->ack_seq = newack; 420 421 return nf_nat_sack_adjust(skb, tcph, ct, ctinfo); 422} 423 424/* Setup NAT on this expected conntrack so it follows master. */ 425/* If we fail to get a free NAT slot, we'll get dropped on confirm */ 426void nf_nat_follow_master(struct nf_conn *ct, 427 struct nf_conntrack_expect *exp) 428{ 429 struct nf_nat_range range; 430 431 /* This must be a fresh one. */ 432 BUG_ON(ct->status & IPS_NAT_DONE_MASK); 433 434 /* Change src to where master sends to */ 435 range.flags = IP_NAT_RANGE_MAP_IPS; 436 range.min_ip = range.max_ip 437 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; 438 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); 439 440 /* For DST manip, map port here to where it's expected. */ 441 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 442 range.min = range.max = exp->saved_proto; 443 range.min_ip = range.max_ip 444 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; 445 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); 446} 447EXPORT_SYMBOL(nf_nat_follow_master); 448