1/* 2 * ip_vs_proto_udp.c: UDP load balancing support for IPVS 3 * 4 * Version: $Id: ip_vs_proto_udp.c,v 1.1.1.1 2007/08/03 18:53:52 Exp $ 5 * 6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 7 * Julian Anastasov <ja@ssi.bg> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 12 * 2 of the License, or (at your option) any later version. 13 * 14 * Changes: 15 * 16 */ 17 18#include <linux/in.h> 19#include <linux/ip.h> 20#include <linux/kernel.h> 21#include <linux/netfilter_ipv4.h> 22#include <linux/udp.h> 23 24#include <net/ip_vs.h> 25#include <net/ip.h> 26 27static struct ip_vs_conn * 28udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, 29 const struct iphdr *iph, unsigned int proto_off, int inverse) 30{ 31 struct ip_vs_conn *cp; 32 __be16 _ports[2], *pptr; 33 34 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 35 if (pptr == NULL) 36 return NULL; 37 38 if (likely(!inverse)) { 39 cp = ip_vs_conn_in_get(iph->protocol, 40 iph->saddr, pptr[0], 41 iph->daddr, pptr[1]); 42 } else { 43 cp = ip_vs_conn_in_get(iph->protocol, 44 iph->daddr, pptr[1], 45 iph->saddr, pptr[0]); 46 } 47 48 return cp; 49} 50 51 52static struct ip_vs_conn * 53udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, 54 const struct iphdr *iph, unsigned int proto_off, int inverse) 55{ 56 struct ip_vs_conn *cp; 57 __be16 _ports[2], *pptr; 58 59 pptr = skb_header_pointer(skb, ip_hdrlen(skb), 60 sizeof(_ports), _ports); 61 if (pptr == NULL) 62 return NULL; 63 64 if (likely(!inverse)) { 65 cp = ip_vs_conn_out_get(iph->protocol, 66 iph->saddr, pptr[0], 67 iph->daddr, pptr[1]); 68 } else { 69 cp = ip_vs_conn_out_get(iph->protocol, 70 iph->daddr, pptr[1], 71 iph->saddr, pptr[0]); 72 } 73 74 return cp; 75} 76 77 78static int 79udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, 80 int *verdict, struct ip_vs_conn **cpp) 81{ 82 struct ip_vs_service *svc; 83 struct udphdr _udph, *uh; 84 85 uh = skb_header_pointer(skb, ip_hdrlen(skb), 86 sizeof(_udph), &_udph); 87 if (uh == NULL) { 88 *verdict = NF_DROP; 89 return 0; 90 } 91 92 if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, 93 ip_hdr(skb)->daddr, uh->dest))) { 94 if (ip_vs_todrop()) { 95 /* 96 * It seems that we are very loaded. 97 * We have to drop this packet :( 98 */ 99 ip_vs_service_put(svc); 100 *verdict = NF_DROP; 101 return 0; 102 } 103 104 /* 105 * Let the virtual server select a real server for the 106 * incoming connection, and create a connection entry. 107 */ 108 *cpp = ip_vs_schedule(svc, skb); 109 if (!*cpp) { 110 *verdict = ip_vs_leave(svc, skb, pp); 111 return 0; 112 } 113 ip_vs_service_put(svc); 114 } 115 return 1; 116} 117 118 119static inline void 120udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, 121 __be16 oldport, __be16 newport) 122{ 123 uhdr->check = 124 csum_fold(ip_vs_check_diff4(oldip, newip, 125 ip_vs_check_diff2(oldport, newport, 126 ~csum_unfold(uhdr->check)))); 127 if (!uhdr->check) 128 uhdr->check = CSUM_MANGLED_0; 129} 130 131static int 132udp_snat_handler(struct sk_buff **pskb, 133 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 134{ 135 struct udphdr *udph; 136 const unsigned int udphoff = ip_hdrlen(*pskb); 137 138 /* csum_check requires unshared skb */ 139 if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) 140 return 0; 141 142 if (unlikely(cp->app != NULL)) { 143 /* Some checks before mangling */ 144 if (pp->csum_check && !pp->csum_check(*pskb, pp)) 145 return 0; 146 147 /* 148 * Call application helper if needed 149 */ 150 if (!ip_vs_app_pkt_out(cp, pskb)) 151 return 0; 152 } 153 154 udph = (void *)ip_hdr(*pskb) + udphoff; 155 udph->source = cp->vport; 156 157 /* 158 * Adjust UDP checksums 159 */ 160 if (!cp->app && (udph->check != 0)) { 161 /* Only port and addr are changed, do fast csum update */ 162 udp_fast_csum_update(udph, cp->daddr, cp->vaddr, 163 cp->dport, cp->vport); 164 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) 165 (*pskb)->ip_summed = CHECKSUM_NONE; 166 } else { 167 /* full checksum calculation */ 168 udph->check = 0; 169 (*pskb)->csum = skb_checksum(*pskb, udphoff, 170 (*pskb)->len - udphoff, 0); 171 udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, 172 (*pskb)->len - udphoff, 173 cp->protocol, 174 (*pskb)->csum); 175 if (udph->check == 0) 176 udph->check = CSUM_MANGLED_0; 177 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", 178 pp->name, udph->check, 179 (char*)&(udph->check) - (char*)udph); 180 } 181 return 1; 182} 183 184 185static int 186udp_dnat_handler(struct sk_buff **pskb, 187 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 188{ 189 struct udphdr *udph; 190 unsigned int udphoff = ip_hdrlen(*pskb); 191 192 /* csum_check requires unshared skb */ 193 if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) 194 return 0; 195 196 if (unlikely(cp->app != NULL)) { 197 /* Some checks before mangling */ 198 if (pp->csum_check && !pp->csum_check(*pskb, pp)) 199 return 0; 200 201 /* 202 * Attempt ip_vs_app call. 203 * It will fix ip_vs_conn 204 */ 205 if (!ip_vs_app_pkt_in(cp, pskb)) 206 return 0; 207 } 208 209 udph = (void *)ip_hdr(*pskb) + udphoff; 210 udph->dest = cp->dport; 211 212 /* 213 * Adjust UDP checksums 214 */ 215 if (!cp->app && (udph->check != 0)) { 216 /* Only port and addr are changed, do fast csum update */ 217 udp_fast_csum_update(udph, cp->vaddr, cp->daddr, 218 cp->vport, cp->dport); 219 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) 220 (*pskb)->ip_summed = CHECKSUM_NONE; 221 } else { 222 /* full checksum calculation */ 223 udph->check = 0; 224 (*pskb)->csum = skb_checksum(*pskb, udphoff, 225 (*pskb)->len - udphoff, 0); 226 udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, 227 (*pskb)->len - udphoff, 228 cp->protocol, 229 (*pskb)->csum); 230 if (udph->check == 0) 231 udph->check = CSUM_MANGLED_0; 232 (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; 233 } 234 return 1; 235} 236 237 238static int 239udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) 240{ 241 struct udphdr _udph, *uh; 242 const unsigned int udphoff = ip_hdrlen(skb); 243 244 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); 245 if (uh == NULL) 246 return 0; 247 248 if (uh->check != 0) { 249 switch (skb->ip_summed) { 250 case CHECKSUM_NONE: 251 skb->csum = skb_checksum(skb, udphoff, 252 skb->len - udphoff, 0); 253 case CHECKSUM_COMPLETE: 254 if (csum_tcpudp_magic(ip_hdr(skb)->saddr, 255 ip_hdr(skb)->daddr, 256 skb->len - udphoff, 257 ip_hdr(skb)->protocol, 258 skb->csum)) { 259 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 260 "Failed checksum for"); 261 return 0; 262 } 263 break; 264 default: 265 /* No need to checksum. */ 266 break; 267 } 268 } 269 return 1; 270} 271 272 273/* 274 * Note: the caller guarantees that only one of register_app, 275 * unregister_app or app_conn_bind is called each time. 276 */ 277 278#define UDP_APP_TAB_BITS 4 279#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) 280#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) 281 282static struct list_head udp_apps[UDP_APP_TAB_SIZE]; 283static DEFINE_SPINLOCK(udp_app_lock); 284 285static inline __u16 udp_app_hashkey(__be16 port) 286{ 287 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) 288 & UDP_APP_TAB_MASK; 289} 290 291 292static int udp_register_app(struct ip_vs_app *inc) 293{ 294 struct ip_vs_app *i; 295 __u16 hash; 296 __be16 port = inc->port; 297 int ret = 0; 298 299 hash = udp_app_hashkey(port); 300 301 302 spin_lock_bh(&udp_app_lock); 303 list_for_each_entry(i, &udp_apps[hash], p_list) { 304 if (i->port == port) { 305 ret = -EEXIST; 306 goto out; 307 } 308 } 309 list_add(&inc->p_list, &udp_apps[hash]); 310 atomic_inc(&ip_vs_protocol_udp.appcnt); 311 312 out: 313 spin_unlock_bh(&udp_app_lock); 314 return ret; 315} 316 317 318static void 319udp_unregister_app(struct ip_vs_app *inc) 320{ 321 spin_lock_bh(&udp_app_lock); 322 atomic_dec(&ip_vs_protocol_udp.appcnt); 323 list_del(&inc->p_list); 324 spin_unlock_bh(&udp_app_lock); 325} 326 327 328static int udp_app_conn_bind(struct ip_vs_conn *cp) 329{ 330 int hash; 331 struct ip_vs_app *inc; 332 int result = 0; 333 334 /* Default binding: bind app only for NAT */ 335 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) 336 return 0; 337 338 /* Lookup application incarnations and bind the right one */ 339 hash = udp_app_hashkey(cp->vport); 340 341 spin_lock(&udp_app_lock); 342 list_for_each_entry(inc, &udp_apps[hash], p_list) { 343 if (inc->port == cp->vport) { 344 if (unlikely(!ip_vs_app_inc_get(inc))) 345 break; 346 spin_unlock(&udp_app_lock); 347 348 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" 349 "%u.%u.%u.%u:%u to app %s on port %u\n", 350 __FUNCTION__, 351 NIPQUAD(cp->caddr), ntohs(cp->cport), 352 NIPQUAD(cp->vaddr), ntohs(cp->vport), 353 inc->name, ntohs(inc->port)); 354 cp->app = inc; 355 if (inc->init_conn) 356 result = inc->init_conn(inc, cp); 357 goto out; 358 } 359 } 360 spin_unlock(&udp_app_lock); 361 362 out: 363 return result; 364} 365 366 367static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { 368 [IP_VS_UDP_S_NORMAL] = 5*60*HZ, 369 [IP_VS_UDP_S_LAST] = 2*HZ, 370}; 371 372static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = { 373 [IP_VS_UDP_S_NORMAL] = "UDP", 374 [IP_VS_UDP_S_LAST] = "BUG!", 375}; 376 377 378static int 379udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) 380{ 381 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST, 382 udp_state_name_table, sname, to); 383} 384 385static const char * udp_state_name(int state) 386{ 387 if (state >= IP_VS_UDP_S_LAST) 388 return "ERR!"; 389 return udp_state_name_table[state] ? udp_state_name_table[state] : "?"; 390} 391 392static int 393udp_state_transition(struct ip_vs_conn *cp, int direction, 394 const struct sk_buff *skb, 395 struct ip_vs_protocol *pp) 396{ 397 cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; 398 return 1; 399} 400 401static void udp_init(struct ip_vs_protocol *pp) 402{ 403 IP_VS_INIT_HASH_TABLE(udp_apps); 404 pp->timeout_table = udp_timeouts; 405} 406 407static void udp_exit(struct ip_vs_protocol *pp) 408{ 409} 410 411 412struct ip_vs_protocol ip_vs_protocol_udp = { 413 .name = "UDP", 414 .protocol = IPPROTO_UDP, 415 .dont_defrag = 0, 416 .init = udp_init, 417 .exit = udp_exit, 418 .conn_schedule = udp_conn_schedule, 419 .conn_in_get = udp_conn_in_get, 420 .conn_out_get = udp_conn_out_get, 421 .snat_handler = udp_snat_handler, 422 .dnat_handler = udp_dnat_handler, 423 .csum_check = udp_csum_check, 424 .state_transition = udp_state_transition, 425 .state_name = udp_state_name, 426 .register_app = udp_register_app, 427 .unregister_app = udp_unregister_app, 428 .app_conn_bind = udp_app_conn_bind, 429 .debug_packet = ip_vs_tcpudp_debug_packet, 430 .timeout_change = NULL, 431 .set_state_timeout = udp_set_state_timeout, 432}; 433